diff --git a/cmd/erasure-utils.go b/cmd/erasure-utils.go index d01ea731a..814df1c47 100644 --- a/cmd/erasure-utils.go +++ b/cmd/erasure-utils.go @@ -24,8 +24,8 @@ import ( "sync" "github.com/klauspost/reedsolomon" - "github.com/minio/blake2b-simd" "github.com/minio/sha256-simd" + "golang.org/x/crypto/blake2b" ) // newHashWriters - inititialize a slice of hashes for the disk count. @@ -48,14 +48,14 @@ func newHash(algo string) (h hash.Hash) { // ignore the error, because New512 without a key never fails // New512 only returns a non-nil error, if the length of the passed // key > 64 bytes - but we use blake2b as hash function (no key) - h = blake2b.New512() + h, _ = blake2b.New512(nil) // Add new hashes here. default: // Default to blake2b. // ignore the error, because New512 without a key never fails // New512 only returns a non-nil error, if the length of the passed // key > 64 bytes - but we use blake2b as hash function (no key) - h = blake2b.New512() + h, _ = blake2b.New512(nil) } return h } diff --git a/vendor/github.com/minio/blake2b-simd/blake2b.go b/vendor/github.com/minio/blake2b-simd/blake2b.go deleted file mode 100644 index 538466a1a..000000000 --- a/vendor/github.com/minio/blake2b-simd/blake2b.go +++ /dev/null @@ -1,301 +0,0 @@ -// Written in 2012 by Dmitry Chestnykh. -// -// To the extent possible under law, the author have dedicated all copyright -// and related and neighboring rights to this software to the public domain -// worldwide. This software is distributed without any warranty. -// http://creativecommons.org/publicdomain/zero/1.0/ - -// Package blake2b implements BLAKE2b cryptographic hash function. -package blake2b - -import ( - "encoding/binary" - "errors" - "hash" -) - -const ( - BlockSize = 128 // block size of algorithm - Size = 64 // maximum digest size - SaltSize = 16 // maximum salt size - PersonSize = 16 // maximum personalization string size - KeySize = 64 // maximum size of key -) - -type digest struct { - h [8]uint64 // current chain value - t [2]uint64 // message bytes counter - f [2]uint64 // finalization flags - x [BlockSize]byte // buffer for data not yet compressed - nx int // number of bytes in buffer - - ih [8]uint64 // initial chain value (after config) - paddedKey [BlockSize]byte // copy of key, padded with zeros - isKeyed bool // indicates whether hash was keyed - size uint8 // digest size in bytes - isLastNode bool // indicates processing of the last node in tree hashing -} - -// Initialization values. -var iv = [8]uint64{ - 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, - 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, - 0x510e527fade682d1, 0x9b05688c2b3e6c1f, - 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, -} - -// Config is used to configure hash function parameters and keying. -// All parameters are optional. -type Config struct { - Size uint8 // digest size (if zero, default size of 64 bytes is used) - Key []byte // key for prefix-MAC - Salt []byte // salt (if < 16 bytes, padded with zeros) - Person []byte // personalization (if < 16 bytes, padded with zeros) - Tree *Tree // parameters for tree hashing -} - -// Tree represents parameters for tree hashing. -type Tree struct { - Fanout uint8 // fanout - MaxDepth uint8 // maximal depth - LeafSize uint32 // leaf maximal byte length (0 for unlimited) - NodeOffset uint64 // node offset (0 for first, leftmost or leaf) - NodeDepth uint8 // node depth (0 for leaves) - InnerHashSize uint8 // inner hash byte length - IsLastNode bool // indicates processing of the last node of layer -} - -var ( - defaultConfig = &Config{Size: Size} - config256 = &Config{Size: 32} -) - -func verifyConfig(c *Config) error { - if c.Size > Size { - return errors.New("digest size is too large") - } - if len(c.Key) > KeySize { - return errors.New("key is too large") - } - if len(c.Salt) > SaltSize { - // Smaller salt is okay: it will be padded with zeros. - return errors.New("salt is too large") - } - if len(c.Person) > PersonSize { - // Smaller personalization is okay: it will be padded with zeros. - return errors.New("personalization is too large") - } - if c.Tree != nil { - if c.Tree.Fanout == 1 { - return errors.New("fanout of 1 is not allowed in tree mode") - } - if c.Tree.MaxDepth < 2 { - return errors.New("incorrect tree depth") - } - if c.Tree.InnerHashSize < 1 || c.Tree.InnerHashSize > Size { - return errors.New("incorrect tree inner hash size") - } - } - return nil -} - -// New returns a new hash.Hash configured with the given Config. -// Config can be nil, in which case the default one is used, calculating 64-byte digest. -// Returns non-nil error if Config contains invalid parameters. -func New(c *Config) (hash.Hash, error) { - if c == nil { - c = defaultConfig - } else { - if c.Size == 0 { - // Set default size if it's zero. - c.Size = Size - } - if err := verifyConfig(c); err != nil { - return nil, err - } - } - d := new(digest) - d.initialize(c) - return d, nil -} - -// initialize initializes digest with the given -// config, which must be non-nil and verified. -func (d *digest) initialize(c *Config) { - // Create parameter block. - var p [BlockSize]byte - p[0] = c.Size - p[1] = uint8(len(c.Key)) - if c.Salt != nil { - copy(p[32:], c.Salt) - } - if c.Person != nil { - copy(p[48:], c.Person) - } - if c.Tree != nil { - p[2] = c.Tree.Fanout - p[3] = c.Tree.MaxDepth - binary.LittleEndian.PutUint32(p[4:], c.Tree.LeafSize) - binary.LittleEndian.PutUint64(p[8:], c.Tree.NodeOffset) - p[16] = c.Tree.NodeDepth - p[17] = c.Tree.InnerHashSize - } else { - p[2] = 1 - p[3] = 1 - } - - // Initialize. - d.size = c.Size - for i := 0; i < 8; i++ { - d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(p[i*8:]) - } - if c.Tree != nil && c.Tree.IsLastNode { - d.isLastNode = true - } - - // Process key. - if c.Key != nil { - copy(d.paddedKey[:], c.Key) - d.Write(d.paddedKey[:]) - d.isKeyed = true - } - // Save a copy of initialized state. - copy(d.ih[:], d.h[:]) -} - -// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum. -func New512() hash.Hash { - d := new(digest) - d.initialize(defaultConfig) - return d -} - -// New256 returns a new hash.Hash computing the BLAKE2b 32-byte checksum. -func New256() hash.Hash { - d := new(digest) - d.initialize(config256) - return d -} - -// NewMAC returns a new hash.Hash computing BLAKE2b prefix- -// Message Authentication Code of the given size in bytes -// (up to 64) with the given key (up to 64 bytes in length). -func NewMAC(outBytes uint8, key []byte) hash.Hash { - d, err := New(&Config{Size: outBytes, Key: key}) - if err != nil { - panic(err.Error()) - } - return d -} - -// Reset resets the state of digest to the initial state -// after configuration and keying. -func (d *digest) Reset() { - copy(d.h[:], d.ih[:]) - d.t[0] = 0 - d.t[1] = 0 - d.f[0] = 0 - d.f[1] = 0 - d.nx = 0 - if d.isKeyed { - d.Write(d.paddedKey[:]) - } -} - -// Size returns the digest size in bytes. -func (d *digest) Size() int { return int(d.size) } - -// BlockSize returns the algorithm block size in bytes. -func (d *digest) BlockSize() int { return BlockSize } - -func (d *digest) Write(p []byte) (nn int, err error) { - nn = len(p) - left := BlockSize - d.nx - if len(p) > left { - // Process buffer. - copy(d.x[d.nx:], p[:left]) - p = p[left:] - compress(d, d.x[:]) - d.nx = 0 - } - // Process full blocks except for the last one. - if len(p) > BlockSize { - n := len(p) &^ (BlockSize - 1) - if n == len(p) { - n -= BlockSize - } - compress(d, p[:n]) - p = p[n:] - } - // Fill buffer. - d.nx += copy(d.x[d.nx:], p) - return -} - -// Sum returns the calculated checksum. -func (d *digest) Sum(in []byte) []byte { - // Make a copy of d so that caller can keep writing and summing. - d0 := *d - hash := d0.checkSum() - return append(in, hash[:d0.size]...) -} - -func (d *digest) checkSum() [Size]byte { - // Do not create unnecessary copies of the key. - if d.isKeyed { - for i := 0; i < len(d.paddedKey); i++ { - d.paddedKey[i] = 0 - } - } - - dec := BlockSize - uint64(d.nx) - if d.t[0] < dec { - d.t[1]-- - } - d.t[0] -= dec - - // Pad buffer with zeros. - for i := d.nx; i < len(d.x); i++ { - d.x[i] = 0 - } - // Set last block flag. - d.f[0] = 0xffffffffffffffff - if d.isLastNode { - d.f[1] = 0xffffffffffffffff - } - // Compress last block. - compress(d, d.x[:]) - - var out [Size]byte - j := 0 - for _, s := range d.h[:(d.size-1)/8+1] { - out[j+0] = byte(s >> 0) - out[j+1] = byte(s >> 8) - out[j+2] = byte(s >> 16) - out[j+3] = byte(s >> 24) - out[j+4] = byte(s >> 32) - out[j+5] = byte(s >> 40) - out[j+6] = byte(s >> 48) - out[j+7] = byte(s >> 56) - j += 8 - } - return out -} - -// Sum512 returns a 64-byte BLAKE2b hash of data. -func Sum512(data []byte) [64]byte { - var d digest - d.initialize(defaultConfig) - d.Write(data) - return d.checkSum() -} - -// Sum256 returns a 32-byte BLAKE2b hash of data. -func Sum256(data []byte) (out [32]byte) { - var d digest - d.initialize(config256) - d.Write(data) - sum := d.checkSum() - copy(out[:], sum[:32]) - return -} diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.go b/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.go deleted file mode 100644 index ec53599f8..000000000 --- a/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.go +++ /dev/null @@ -1,47 +0,0 @@ -//+build !noasm -//+build !appengine - -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package blake2b - -//go:noescape -func compressAVX2Loop(p []uint8, in, iv, t, f, shffle, out []uint64) - -func compressAVX2(d *digest, p []uint8) { - var ( - in [8]uint64 - out [8]uint64 - shffle [8]uint64 - ) - - // vector for PSHUFB instruction - shffle[0] = 0x0201000706050403 - shffle[1] = 0x0a09080f0e0d0c0b - shffle[2] = 0x0201000706050403 - shffle[3] = 0x0a09080f0e0d0c0b - shffle[4] = 0x0100070605040302 - shffle[5] = 0x09080f0e0d0c0b0a - shffle[6] = 0x0100070605040302 - shffle[7] = 0x09080f0e0d0c0b0a - - in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] - - compressAVX2Loop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:]) - - d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7] -} diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.s b/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.s deleted file mode 100644 index 24df234b5..000000000 --- a/vendor/github.com/minio/blake2b-simd/compressAvx2_amd64.s +++ /dev/null @@ -1,671 +0,0 @@ -//+build !noasm !appengine - -// -// Minio Cloud Storage, (C) 2016 Minio, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -// -// Based on AVX2 implementation from https://github.com/sneves/blake2-avx2/blob/master/blake2b-common.h -// -// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent -// -// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as: -// #define ROUND(r) \ -// LOAD_MSG_ ##r ##_1(b0, b1); \ -// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// LOAD_MSG_ ##r ##_2(b0, b1); \ -// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ -// LOAD_MSG_ ##r ##_3(b0, b1); \ -// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// LOAD_MSG_ ##r ##_4(b0, b1); \ -// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); -// -// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go -// -// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly) -// -// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and -// rounds 2 & 12 are identical) -// - -#define G1 \ - \ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); - BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc4 \ // VPADDQ YMM0,YMM0,YMM4 /* v0 += m[0], v1 += m[2], v2 += m[4], v3 += m[6] */ - BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */ - BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */ - BYTE $0xc5; BYTE $0xfd; BYTE $0x70; BYTE $0xdb; BYTE $0xb1 \ // VPSHUFD YMM3,YMM3,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = */ - BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */ - BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */ - BYTE $0xc4; BYTE $0xe2; BYTE $0x75; BYTE $0x00; BYTE $0xce // VPSHUFB YMM1,YMM1,YMM6 /* v4 = v4<<(64-24) | v4>>24, ..., ..., v7 = v7<<(64-24) | v7>>24 */ - -#define G2 \ - BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc5 \ // VPADDQ YMM0,YMM0,YMM5 /* v0 += m[1], v1 += m[3], v2 += m[5], v3 += m[7] */ - BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */ - BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */ - BYTE $0xc4; BYTE $0xe2; BYTE $0x65; BYTE $0x00; BYTE $0xdf \ // VPSHUFB YMM3,YMM3,YMM7 /* v12 = v12<<(64-16) | v12>>16, ..., ..., v15 = v15<<(64-16) | v15>>16 */ - BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */ - BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */ - BYTE $0xc5; BYTE $0x75; BYTE $0xd4; BYTE $0xf9 \ // VPADDQ YMM15,YMM1,YMM1 /* temp reg = reg*2 */ - BYTE $0xc5; BYTE $0xf5; BYTE $0x73; BYTE $0xd1; BYTE $0x3f \ // VPSRLQ YMM1,YMM1,0x3f /* reg = reg>>63 */ - BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcf // VPXOR YMM1,YMM1,YMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */ - -#define DIAGONALIZE \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x93 - BYTE $0x93 \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e - BYTE $0x4e \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x39 - BYTE $0x39 \ - // DO NOT DELETE -- macro delimiter (previous line extended) - -#define UNDIAGONALIZE \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x39 - BYTE $0x39 \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e - BYTE $0x4e \ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x93 - BYTE $0x93 \ - // DO NOT DELETE -- macro delimiter (previous line extended) - -#define LOAD_SHUFFLE \ - MOVQ shffle+120(FP), SI \ // SI: &shuffle - BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x36 \ // VMOVDQU YMM6, [rsi] - BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x7e; BYTE $0x20 // VMOVDQU YMM7, 32[rsi] - -// func compressAVX2Loop(compressSSE(p []uint8, in, iv, t, f, shffle, out []uint64) -TEXT ·compressAVX2Loop(SB), 7, $0 - - // REGISTER USE - // Y0 - Y3: v0 - v15 - // Y4 - Y5: m[0] - m[7] - // Y6 - Y7: shuffle value - // Y8 - Y9: temp registers - // Y10 -Y13: copy of full message - // Y15: temp register - - // Load digest - MOVQ in+24(FP), SI // SI: &in - BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x06 // VMOVDQU YMM0, [rsi] - BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x4e; BYTE $0x20 // VMOVDQU YMM1, 32[rsi] - - // Already store digest into &out (so we can reload it later generically) - MOVQ out+144(FP), SI // SI: &out - BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0 - BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1 - - // Initialize message pointer and loop counter - MOVQ message+0(FP), DX // DX: &p (message) - MOVQ message_len+8(FP), R8 // R8: len(message) - SHRQ $7, R8 // len(message) / 128 - CMPQ R8, $0 - JEQ complete - -loop: - // Increment counter - MOVQ t+72(FP), SI // SI: &t - MOVQ 0(SI), R9 // - ADDQ $128, R9 // /* d.t[0] += BlockSize */ - MOVQ R9, 0(SI) // - CMPQ R9, $128 // /* if d.t[0] < BlockSize { */ - JGE noincr // - MOVQ 8(SI), R9 // - ADDQ $1, R9 // /* d.t[1]++ */ - MOVQ R9, 8(SI) // -noincr: // /* } */ - - // Load initialization vector - MOVQ iv+48(FP), SI // SI: &iv - BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x16 // VMOVDQU YMM2, [rsi] - BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x5e; BYTE $0x20 // VMOVDQU YMM3, 32[rsi] - MOVQ t+72(FP), SI // SI: &t - BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 0 /* Y8 = t[0]+t[1] */ - BYTE $0x00 - MOVQ t+96(FP), SI // SI: &f - BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 1 /* Y8 = t[0]+t[1]+f[0]+f[1] */ - BYTE $0x01 - BYTE $0xc4; BYTE $0xc1; BYTE $0x65; BYTE $0xef; BYTE $0xd8 // VPXOR YMM3,YMM3,YMM8 /* Y3 = Y3 ^ Y8 */ - - BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x12 // VMOVDQU YMM10, [rdx] /* Y10 = m[0]+ m[1]+ m[2]+ m[3] */ - BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x5a; BYTE $0x20 // VMOVDQU YMM11, 32[rdx] /* Y11 = m[4]+ m[5]+ m[6]+ m[7] */ - BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x62; BYTE $0x40 // VMOVDQU YMM12, 64[rdx] /* Y12 = m[8]+ m[9]+m[10]+m[11] */ - BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6a; BYTE $0x60 // VMOVDQU YMM13, 96[rdx] /* Y13 = m[12]+m[13]+m[14]+m[15] */ - - LOAD_SHUFFLE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */ - BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */ - BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 2 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */ - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */ - BYTE $0x03 - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */ - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */ - BYTE $0x20 - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */ - BYTE $0x02 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */ - BYTE $0x30 - BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */ - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ - BYTE $0x30 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */ - BYTE $0x01 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */ - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */ - BYTE $0x03 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */ - BYTE $0x20 - - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */ - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */ - BYTE $0x09 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */ - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ - BYTE $0x30 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 3 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00 - BYTE $0x00 - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM11, YMM13 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 - BYTE $0x21 - - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc2 // VPUNPCKLQDQ YMM8, YMM12, YMM10 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55 - BYTE $0x55 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 - BYTE $0x30 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM12, YMM8 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 - BYTE $0x31 - - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM13, YMM11 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x00 - BYTE $0x00 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 - BYTE $0x21 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 4 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10 - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM13, YMM12 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 - BYTE $0x21 - - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM12, YMM10 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x08 - BYTE $0x08 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 - BYTE $0x20 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x55 - BYTE $0x55 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 - BYTE $0x21 - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM11, YMM12 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 - BYTE $0x21 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 5 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM12, YMM11 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30 - BYTE $0x30 - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 - BYTE $0x20 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 - BYTE $0x31 - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00 - BYTE $0x00 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM10, YMM8 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55 - BYTE $0x55 - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM12, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 - BYTE $0x20 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 6 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21 - BYTE $0x21 - - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM13, YMM12 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM12, YMM10 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 - BYTE $0x30 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM13, YMM10 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30 - BYTE $0x30 - - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x55 - BYTE $0x55 - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 - BYTE $0x30 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 7 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x55 - BYTE $0x55 - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8 - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM13, YMM11 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30 - BYTE $0x30 - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa - BYTE $0xaa - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM13, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20 - BYTE $0x20 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x01 - BYTE $0x01 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 - BYTE $0x20 - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31 - BYTE $0x31 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 8 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 - BYTE $0x20 - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa - BYTE $0xaa - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM10, YMM12 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 - BYTE $0x21 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM12, YMM10 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c - BYTE $0x0c - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 - BYTE $0x20 - - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM11, YMM12 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 - BYTE $0x30 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 9 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0x00 - BYTE $0x00 - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM12, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 - BYTE $0x31 - - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x00 - BYTE $0x00 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31 - BYTE $0x31 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa - BYTE $0xaa - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9 - BYTE $0xc4; BYTE $0xc3; BYTE $0x15; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM13, YMM9, 0x20 - BYTE $0x20 - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff - BYTE $0xff - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x04 - BYTE $0x04 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 - BYTE $0x21 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 10 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x20 - BYTE $0x20 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 - BYTE $0x31 - - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31 - BYTE $0x31 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM10, YMM13 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60 - BYTE $0x60 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31 - BYTE $0x31 - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa - BYTE $0xaa - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8 - BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM13, YMM10 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21 - BYTE $0x21 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 1 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */ - BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */ - BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */ - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */ - BYTE $0xd8 - - G1 - G2 - - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 2 - /////////////////////////////////////////////////////////////////////////// - - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */ - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */ - BYTE $0x03 - BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */ - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */ - BYTE $0x20 - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */ - BYTE $0x02 - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */ - BYTE $0x30 - BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */ - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ - BYTE $0x30 - - G1 - G2 - - DIAGONALIZE - - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */ - BYTE $0x01 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */ - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */ - BYTE $0x03 - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */ - BYTE $0x20 - - BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */ - BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */ - BYTE $0x09 - BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */ - BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */ - BYTE $0x30 - - G1 - G2 - - UNDIAGONALIZE - - // Reload digest (most current value store in &out) - MOVQ out+144(FP), SI // SI: &in - BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x26 // VMOVDQU YMM12, [rsi] - BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6e; BYTE $0x20 // VMOVDQU YMM13, 32[rsi] - - BYTE $0xc5; BYTE $0xfd; BYTE $0xef; BYTE $0xc2 // VPXOR YMM0,YMM0,YMM2 /* X0 = X0 ^ X4, X1 = X1 ^ X5 */ - BYTE $0xc4; BYTE $0xc1; BYTE $0x7d; BYTE $0xef; BYTE $0xc4 // VPXOR YMM0,YMM0,YMM12 /* X0 = X0 ^ X12, X1 = X1 ^ X13 */ - BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xcb // VPXOR YMM1,YMM1,YMM3 /* X2 = X2 ^ X6, X3 = X3 ^ X7 */ - BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcd // VPXOR YMM1,YMM1,YMM13 /* X2 = X2 ^ X14, X3 = X3 ^ X15 */ - - // Store digest into &out - MOVQ out+144(FP), SI // SI: &out - BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0 - BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1 - - // Increment message pointer and check if there's more to do - ADDQ $128, DX // message += 128 - SUBQ $1, R8 - JNZ loop - -complete: - BYTE $0xc5; BYTE $0xf8; BYTE $0x77 // VZEROUPPER /* Prevent further context switches */ - RET - diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.go b/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.go deleted file mode 100644 index cfa12c04f..000000000 --- a/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.go +++ /dev/null @@ -1,41 +0,0 @@ -//+build !noasm -//+build !appengine - -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package blake2b - -//go:noescape -func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64) - -func compressAVX(d *digest, p []uint8) { - var ( - in [8]uint64 - out [8]uint64 - shffle [2]uint64 - ) - - // vector for PSHUFB instruction - shffle[0] = 0x0201000706050403 - shffle[1] = 0x0a09080f0e0d0c0b - - in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] - - blockAVXLoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:]) - - d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7] -} diff --git a/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.s b/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.s deleted file mode 100644 index f68e17392..000000000 --- a/vendor/github.com/minio/blake2b-simd/compressAvx_amd64.s +++ /dev/null @@ -1,682 +0,0 @@ -//+build !noasm !appengine - -// -// Minio Cloud Storage, (C) 2016 Minio, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -// -// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c -// -// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent -// -// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as: -// #define ROUND(r) \ -// LOAD_MSG_ ##r ##_1(b0, b1); \ -// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// LOAD_MSG_ ##r ##_2(b0, b1); \ -// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ -// LOAD_MSG_ ##r ##_3(b0, b1); \ -// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// LOAD_MSG_ ##r ##_4(b0, b1); \ -// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); -// -// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go -// -// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly) -// -// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and -// rounds 2 & 12 are identical) -// - -#define G1 \ - \ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); - LONG $0xd479c1c4; BYTE $0xc0 \ // VPADDQ XMM0,XMM0,XMM8 /* v0 += m[0], v1 += m[2] */ - LONG $0xd471c1c4; BYTE $0xc9 \ // VPADDQ XMM1,XMM1,XMM9 /* v2 += m[4], v3 += m[6] */ - LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */ - LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */ - LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ - LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ - LONG $0xf670f9c5; BYTE $0xb1 \ // VPSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */ - LONG $0xff70f9c5; BYTE $0xb1 \ // VPSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */ - LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */ - LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */ - LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ - LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ - LONG $0x0069c2c4; BYTE $0xd4 \ // VPSHUFB XMM2,XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */ - LONG $0x0061c2c4; BYTE $0xdc // VPSHUFB XMM3,XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */ - -#define G2 \ - \ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); - LONG $0xd479c1c4; BYTE $0xc2 \ // VPADDQ XMM0,XMM0,XMM10 /* v0 += m[1], v1 += m[3] */ - LONG $0xd471c1c4; BYTE $0xcb \ // VPADDQ XMM1,XMM1,XMM11 /* v2 += m[5], v3 += m[7] */ - LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */ - LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */ - LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ - LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ - LONG $0xf670fbc5; BYTE $0x39 \ // VPSHUFLW XMM6,XMM6,0x39 /* combined with next ... */ - LONG $0xf670fac5; BYTE $0x39 \ // VPSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */ - LONG $0xff70fbc5; BYTE $0x39 \ // VPSHUFLW XMM7,XMM7,0x39 /* combined with next ... */ - LONG $0xff70fac5; BYTE $0x39 \ // VPSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */ - LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */ - LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */ - LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ - LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ - LONG $0xfad469c5 \ // VPADDQ XMM15,XMM2,XMM2 /* temp reg = reg*2 */ - LONG $0xd273e9c5; BYTE $0x3f \ // VPSRLQ XMM2,XMM2,0x3f /* reg = reg>>63 */ - LONG $0xef69c1c4; BYTE $0xd7 \ // VPXOR XMM2,XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */ - LONG $0xfbd461c5 \ // VPADDQ XMM15,XMM3,XMM3 /* temp reg = reg*2 */ - LONG $0xd373e1c5; BYTE $0x3f \ // VPSRLQ XMM3,XMM3,0x3f /* reg = reg>>63 */ - LONG $0xef61c1c4; BYTE $0xdf // VPXOR XMM3,XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */ - -#define DIAGONALIZE \ - \ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); - MOVOU X6, X13 \ /* t0 = row4l;\ */ - MOVOU X2, X14 \ /* t1 = row2l;\ */ - MOVOU X4, X6 \ /* row4l = row3l;\ */ - MOVOU X5, X4 \ /* row3l = row3h;\ */ - MOVOU X6, X5 \ /* row3h = row4l;\ */ - LONG $0x6c1141c4; BYTE $0xfd \ // VPUNPCKLQDQ XMM15, XMM13, XMM13 /* _mm_unpacklo_epi64(t0, t0) */ - LONG $0x6d41c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM7, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */ - LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ - LONG $0x6d11c1c4; BYTE $0xff \ // VPUNPCKHQDQ XMM7, XMM13, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */ - LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ - LONG $0x6d69c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */ - LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ - LONG $0x6d61c1c4; BYTE $0xdf // VPUNPCKHQDQ XMM3, XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */ - -#define UNDIAGONALIZE \ - \ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); - MOVOU X4, X13 \ /* t0 = row3l;\ */ - MOVOU X5, X4 \ /* row3l = row3h;\ */ - MOVOU X13, X5 \ /* row3h = t0;\ */ - MOVOU X2, X13 \ /* t0 = row2l;\ */ - MOVOU X6, X14 \ /* t1 = row4l;\ */ - LONG $0xfa6c69c5 \ // VPUNPCKLQDQ XMM15, XMM2, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */ - LONG $0x6d61c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM3, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */ - LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ - LONG $0x6d11c1c4; BYTE $0xdf \ // VPUNPCKHQDQ XMM3, XMM13, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */ - LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ - LONG $0x6d49c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */ - LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ - LONG $0x6d41c1c4; BYTE $0xff // VPUNPCKHQDQ XMM7, XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */ - -#define LOAD_SHUFFLE \ - \ // Load shuffle value - MOVQ shffle+120(FP), SI \ // SI: &shuffle - MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a - -// func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64) -TEXT ·blockAVXLoop(SB), 7, $0 - // REGISTER USE - // R8: loop counter - // DX: message pointer - // SI: temp pointer for loading - // X0 - X7: v0 - v15 - // X8 - X11: m[0] - m[7] - // X12: shuffle value - // X13 - X15: temp registers - - // Load digest - MOVQ in+24(FP), SI // SI: &in - MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ - MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ - MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ - MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ - - // Already store digest into &out (so we can reload it later generically) - MOVQ out+144(FP), SI // SI: &out - MOVOU X0, 0(SI) // out[0]+out[1] = X0 - MOVOU X1, 16(SI) // out[2]+out[3] = X1 - MOVOU X2, 32(SI) // out[4]+out[5] = X2 - MOVOU X3, 48(SI) // out[6]+out[7] = X3 - - // Initialize message pointer and loop counter - MOVQ message+0(FP), DX // DX: &p (message) - MOVQ message_len+8(FP), R8 // R8: len(message) - SHRQ $7, R8 // len(message) / 128 - CMPQ R8, $0 - JEQ complete - -loop: - // Increment counter - MOVQ t+72(FP), SI // SI: &t - MOVQ 0(SI), R9 - ADDQ $128, R9 // /* d.t[0] += BlockSize */ - MOVQ R9, 0(SI) - CMPQ R9, $128 // /* if d.t[0] < BlockSize { */ - JGE noincr - MOVQ 8(SI), R9 - ADDQ $1, R9 // /* d.t[1]++ */ - MOVQ R9, 8(SI) -noincr: // /* } */ - - // Load initialization vector - MOVQ iv+48(FP), SI // SI: &iv - MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */ - MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */ - MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */ - MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */ - MOVQ t+72(FP), SI // SI: &t - MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */ - PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */ - MOVQ t+96(FP), SI // SI: &f - MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */ - PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */ - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+m[1] - MOVOU 16(DX), X13 // X13 = m[2]+m[3] - MOVOU 32(DX), X14 // X14 = m[4]+m[5] - MOVOU 48(DX), X15 // X15 = m[6]+m[7] - LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */ - LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */ - LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */ - LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 64(DX), X12 // X12 = m[8]+ m[9] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */ - LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */ - LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */ - LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 2 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 112(DX), X12 // X12 = m[14]+m[15] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */ - LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */ - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 48(DX), X15 // X15 = m[6]+ m[7] - LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */ - LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */ - LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */ - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */ - LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 3 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 32(DX), X12 // X12 = m[4]+ m[5] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x0f0943c4; WORD $0x08c5 // VPALIGNR XMM8, XMM14, XMM13, 0x8 /* m[11], m[12] */ - LONG $0x6d1941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM12, XMM15 /* m[5], m[15] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 64(DX), X15 // X15 = m[8]+ m[9] - LONG $0x6c0141c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM15, XMM12 /* m[8], m[0] */ - LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */ - LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[2], ___ */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[3] */ - LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[10], ___ */ - LONG $0x6d1141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM13, XMM14 /* m[7], m[9] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X14 // X14 = m[4]+ m[5] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6c0141c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM15, XMM13 /* m[14], m[6] */ - LONG $0x0f0943c4; WORD $0x08dc // VPALIGNR XMM11, XMM14, XMM12, 0x8 /* m[1], m[4] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 4 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - LONG $0x6d1141c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM13, XMM12 /* m[7], m[3] */ - LONG $0x6d0141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM15, XMM14 /* m[13], m[11] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 64(DX), X13 // X13 = m[8]+ m[9] - MOVOU 112(DX), X14 // X14 = m[14]+m[15] - LONG $0x6d1141c4; BYTE $0xd4 // VPUNPCKHQDQ XMM10, XMM13, XMM12 /* m[9], m[1] */ - LONG $0x6c0141c4; BYTE $0xde // VPUNPCKLQDQ XMM11, XMM15, XMM14 /* m[12], m[14] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d1141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM13, XMM13 /* ___, m[5] */ - LONG $0x6c1941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM12, XMM8 /* m[2], ____ */ - LONG $0x6d0141c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM15, XMM15 /* ___, m[15] */ - LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[4], ____ */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 64(DX), X15 // X15 = m[8]+ m[9] - LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[6], m[10] */ - LONG $0x6c1941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM12, XMM15 /* m[0], m[8] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 5 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[9], m[5] */ - LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[2], m[10] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X14 // X14 = m[6]+ m[7] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d0941c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM14, XMM14 /* ___, m[7] */ - LONG $0x6c1941c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM12, XMM10 /* m[0], ____ */ - LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[15] */ - LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[4], ____ */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[11] */ - LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[14], ____ */ - LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */ - LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[6], ____ */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 64(DX), X13 // X13 = m[8]+ m[9] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - LONG $0x0f0943c4; WORD $0x08d4 // VPALIGNR XMM10, XMM14, XMM12, 0x8 /* m[1], m[12] */ - LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */ - LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[8], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 6 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 48(DX), X14 // X14 = m[6]+ m[7] - MOVOU 64(DX), X15 // X15 = m[8]+ m[9] - LONG $0x6c1141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM13, XMM14 /* m[2], m[6] */ - LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[0], m[8] */ - MOVOU 80(DX), X12 // X12 = m[10]+m[11] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[10] */ - LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[11], m[3] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 48(DX), X14 // X14 = m[6]+ m[7] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[7] */ - LONG $0x6c1141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM13, XMM8 /* m[4], ____ */ - LONG $0x6d0141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM15, XMM12 /* m[15], m[1] */ - MOVOU 64(DX), X12 // X12 = m[8]+ m[9] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - LONG $0x6d0941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM14, XMM13 /* m[13], m[5] */ - LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[9] */ - LONG $0x6c0141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM15, XMM11 /* m[14], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 7 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[1] */ - LONG $0x6c0941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM14, XMM8 /* m[12], ____ */ - LONG $0x6c0141c4; BYTE $0xcd // VPUNPCKLQDQ XMM9, XMM15, XMM13 /* m[14], m[4] */ - MOVOU 80(DX), X12 // X12 = m[10]+m[11] - LONG $0x6d1141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM13, XMM15 /* m[5], m[15] */ - LONG $0x0f1943c4; WORD $0x08de // VPALIGNR XMM11, XMM12, XMM14, 0x8 /* m[13], m[10] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[6] */ - LONG $0x0f0943c4; WORD $0x08ce // VPALIGNR XMM9, XMM14, XMM14, 0x8 /* m[9], m[8] */ - MOVOU 16(DX), X14 // X14 = m[2]+ m[3] - LONG $0x6d1141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM13, XMM14 /* m[7], m[3] */ - LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[11] */ - LONG $0x6c0941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM14, XMM11 /* m[2], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 8 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[13], m[7] */ - LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */ - LONG $0x6c0941c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM14, XMM9 /* m[12], ____ */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 64(DX), X13 // X13 = m[8]+ m[9] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - LONG $0x0f0143c4; WORD $0x08d6 // VPALIGNR XMM10, XMM15, XMM14, 0x8 /* m[11], m[14] */ - LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[1], m[9] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d1141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM13, XMM15 /* m[5], m[15] */ - LONG $0x6c0941c4; BYTE $0xcc // VPUNPCKLQDQ XMM9, XMM14, XMM12 /* m[8], m[2] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X14 // X14 = m[6]+ m[7] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - LONG $0x6c1941c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM12, XMM13 /* m[0], m[4] */ - LONG $0x6c0941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM14, XMM15 /* m[6], m[10] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 9 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6c1141c4; BYTE $0xc7 // VPUNPCKLQDQ XMM8, XMM13, XMM15 /* m[6], m[14] */ - LONG $0x0f1943c4; WORD $0x08ce // VPALIGNR XMM9, XMM12, XMM14, 0x8 /* m[11], m[0] */ - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - LONG $0x6d0141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM15, XMM14 /* m[15], m[9] */ - LONG $0x0f0943c4; WORD $0x08dd // VPALIGNR XMM11, XMM14, XMM13, 0x8 /* m[3], m[8] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - LONG $0x6d0141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM15, XMM15 /* ___, m[13] */ - LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[12], ____ */ - LONG $0x0f0943c4; WORD $0x08cc // VPALIGNR XMM9, XMM14, XMM12, 0x8 /* m[1], m[10] */ - MOVOU 32(DX), X12 // X12 = m[4]+ m[5] - MOVOU 48(DX), X15 // X15 = m[6]+ m[7] - LONG $0x6d0141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM15, XMM15 /* ___, m[7] */ - LONG $0x6c1141c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM13, XMM10 /* m[2], ____ */ - LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[5] */ - LONG $0x6c1941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM12, XMM11 /* m[4], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 0 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - LONG $0x6c0141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM15, XMM14 /* m[10], m[8] */ - LONG $0x6d1141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM13, XMM12 /* m[7], m[1] */ - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 32(DX), X14 // X14 = m[4]+ m[5] - LONG $0x6c1941c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM12, XMM14 /* m[2], m[4] */ - LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[5] */ - LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[6], ____ */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 64(DX), X13 // X13 = m[8]+ m[9] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d0141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM15, XMM13 /* m[15], m[9] */ - LONG $0x6d1941c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM12, XMM14 /* m[3], m[13] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - LONG $0x0f0143c4; WORD $0x08d5 // VPALIGNR XMM10, XMM15, XMM13, 0x8 /* m[11], m[14] */ - LONG $0x6c0941c4; BYTE $0xdc // VPUNPCKLQDQ XMM11, XMM14, XMM12 /* m[12], m[0] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 1 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+m[1] - MOVOU 16(DX), X13 // X13 = m[2]+m[3] - MOVOU 32(DX), X14 // X14 = m[4]+m[5] - MOVOU 48(DX), X15 // X15 = m[6]+m[7] - LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */ - LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */ - LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */ - LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 64(DX), X12 // X12 = m[8]+ m[9] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */ - LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */ - LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */ - LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 2 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 112(DX), X12 // X12 = m[14]+m[15] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */ - LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */ - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 48(DX), X15 // X15 = m[6]+ m[7] - LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */ - LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */ - LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */ - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */ - LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - // Reload digest (most current value store in &out) - MOVQ out+144(FP), SI // SI: &in - MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ - MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ - MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ - MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ - - // Final computations and prepare for storing - PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */ - PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */ - PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */ - PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */ - PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */ - PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */ - PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */ - PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */ - - // Store digest into &out - MOVQ out+144(FP), SI // SI: &out - MOVOU X0, 0(SI) // out[0]+out[1] = X0 - MOVOU X1, 16(SI) // out[2]+out[3] = X1 - MOVOU X2, 32(SI) // out[4]+out[5] = X2 - MOVOU X3, 48(SI) // out[6]+out[7] = X3 - - // Increment message pointer and check if there's more to do - ADDQ $128, DX // message += 128 - SUBQ $1, R8 - JNZ loop - -complete: - RET diff --git a/vendor/github.com/minio/blake2b-simd/compressSse_amd64.go b/vendor/github.com/minio/blake2b-simd/compressSse_amd64.go deleted file mode 100644 index d539a7ade..000000000 --- a/vendor/github.com/minio/blake2b-simd/compressSse_amd64.go +++ /dev/null @@ -1,41 +0,0 @@ -//+build !noasm -//+build !appengine - -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package blake2b - -//go:noescape -func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64) - -func compressSSE(d *digest, p []uint8) { - var ( - in [8]uint64 - out [8]uint64 - shffle [2]uint64 - ) - - // vector for PSHUFB instruction - shffle[0] = 0x0201000706050403 - shffle[1] = 0x0a09080f0e0d0c0b - - in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] - - blockSSELoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:]) - - d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7] -} diff --git a/vendor/github.com/minio/blake2b-simd/compressSse_amd64.s b/vendor/github.com/minio/blake2b-simd/compressSse_amd64.s deleted file mode 100644 index 6f31c949e..000000000 --- a/vendor/github.com/minio/blake2b-simd/compressSse_amd64.s +++ /dev/null @@ -1,770 +0,0 @@ -//+build !noasm !appengine - -// -// Minio Cloud Storage, (C) 2016 Minio, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -// -// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c -// -// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent -// -// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as: -// #define ROUND(r) \ -// LOAD_MSG_ ##r ##_1(b0, b1); \ -// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// LOAD_MSG_ ##r ##_2(b0, b1); \ -// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ -// LOAD_MSG_ ##r ##_3(b0, b1); \ -// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// LOAD_MSG_ ##r ##_4(b0, b1); \ -// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ -// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); -// -// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go -// -// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly) -// -// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and -// rounds 2 & 12 are identical) -// - -#define G1 \ - \ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); - LONG $0xd40f4166; BYTE $0xc0 \ // PADDQ XMM0,XMM8 /* v0 += m[0], v1 += m[2] */ - LONG $0xd40f4166; BYTE $0xc9 \ // PADDQ XMM1,XMM9 /* v2 += m[4], v3 += m[6] */ - LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */ - LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */ - LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ - LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ - LONG $0xf6700f66; BYTE $0xb1 \ // PSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */ - LONG $0xff700f66; BYTE $0xb1 \ // PSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */ - LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */ - LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */ - LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ - LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ - LONG $0x380f4166; WORD $0xd400 \ // PSHUFB XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */ - LONG $0x380f4166; WORD $0xdc00 // PSHUFB XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */ - -#define G2 \ - \ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); - LONG $0xd40f4166; BYTE $0xc2 \ // PADDQ XMM0,XMM10 /* v0 += m[1], v1 += m[3] */ - LONG $0xd40f4166; BYTE $0xcb \ // PADDQ XMM1,XMM11 /* v2 += m[5], v3 += m[7] */ - LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */ - LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */ - LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */ - LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */ - LONG $0xf6700ff2; BYTE $0x39 \ // PSHUFLW XMM6,XMM6,0x39 /* combined with next ... */ - LONG $0xf6700ff3; BYTE $0x39 \ // PSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */ - LONG $0xff700ff2; BYTE $0x39 \ // PSHUFLW XMM7,XMM7,0x39 /* combined with next ... */ - LONG $0xff700ff3; BYTE $0x39 \ // PSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */ - LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */ - LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */ - LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */ - LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */ - MOVOU X2, X15 \ - LONG $0xd40f4466; BYTE $0xfa \ // PADDQ XMM15,XMM2 /* temp reg = reg*2 */ - LONG $0xd2730f66; BYTE $0x3f \ // PSRLQ XMM2,0x3f /* reg = reg>>63 */ - LONG $0xef0f4166; BYTE $0xd7 \ // PXOR XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */ - MOVOU X3, X15 \ - LONG $0xd40f4466; BYTE $0xfb \ // PADDQ XMM15,XMM3 /* temp reg = reg*2 */ - LONG $0xd3730f66; BYTE $0x3f \ // PSRLQ XMM3,0x3f /* reg = reg>>63 */ - LONG $0xef0f4166; BYTE $0xdf // PXOR XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */ - -#define DIAGONALIZE \ - \ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); - MOVOU X6, X13 \ /* t0 = row4l;\ */ - MOVOU X2, X14 \ /* t1 = row2l;\ */ - MOVOU X4, X6 \ /* row4l = row3l;\ */ - MOVOU X5, X4 \ /* row3l = row3h;\ */ - MOVOU X6, X5 \ /* row3h = row4l;\ */ - LONG $0x6c0f4566; BYTE $0xfd \ // PUNPCKLQDQ XMM15, XMM13 /* _mm_unpacklo_epi64(t0, t0) */ - MOVOU X7, X6 \ - LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */ - LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ - MOVOU X13, X7 \ - LONG $0x6d0f4166; BYTE $0xff \ // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */ - LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ - LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */ - LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ - LONG $0x6d0f4166; BYTE $0xdf // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */ - -#define UNDIAGONALIZE \ - \ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); - MOVOU X4, X13 \ /* t0 = row3l;\ */ - MOVOU X5, X4 \ /* row3l = row3h;\ */ - MOVOU X13, X5 \ /* row3h = t0;\ */ - MOVOU X2, X13 \ /* t0 = row2l;\ */ - MOVOU X6, X14 \ /* t1 = row4l;\ */ - LONG $0x6c0f4466; BYTE $0xfa \ // PUNPCKLQDQ XMM15, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */ - MOVOU X3, X2 \ - LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */ - LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */ - MOVOU X13, X3 \ - LONG $0x6d0f4166; BYTE $0xdf \ // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */ - LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */ - LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */ - LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */ - LONG $0x6d0f4166; BYTE $0xff // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */ - -#define LOAD_SHUFFLE \ - \ // Load shuffle value - MOVQ shffle+120(FP), SI \ // SI: &shuffle - MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a - -// func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64) -TEXT ·blockSSELoop(SB), 7, $0 - // REGISTER USE - // R8: loop counter - // DX: message pointer - // SI: temp pointer for loading - // X0 - X7: v0 - v15 - // X8 - X11: m[0] - m[7] - // X12: shuffle value - // X13 - X15: temp registers - - // Load digest - MOVQ in+24(FP), SI // SI: &in - MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ - MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ - MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ - MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ - - // Already store digest into &out (so we can reload it later generically) - MOVQ out+144(FP), SI // SI: &out - MOVOU X0, 0(SI) // out[0]+out[1] = X0 - MOVOU X1, 16(SI) // out[2]+out[3] = X1 - MOVOU X2, 32(SI) // out[4]+out[5] = X2 - MOVOU X3, 48(SI) // out[6]+out[7] = X3 - - // Initialize message pointer and loop counter - MOVQ message+0(FP), DX // DX: &p (message) - MOVQ message_len+8(FP), R8 // R8: len(message) - SHRQ $7, R8 // len(message) / 128 - CMPQ R8, $0 - JEQ complete - -loop: - // Increment counter - MOVQ t+72(FP), SI // SI: &t - MOVQ 0(SI), R9 - ADDQ $128, R9 // /* d.t[0] += BlockSize */ - MOVQ R9, 0(SI) - CMPQ R9, $128 // /* if d.t[0] < BlockSize { */ - JGE noincr - MOVQ 8(SI), R9 - ADDQ $1, R9 // /* d.t[1]++ */ - MOVQ R9, 8(SI) - -noincr: // /* } */ - - // Load initialization vector - MOVQ iv+48(FP), SI // SI: &iv - MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */ - MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */ - MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */ - MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */ - MOVQ t+72(FP), SI // SI: &t - MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */ - PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */ - MOVQ t+96(FP), SI // SI: &f - MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */ - PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */ - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+m[1] - MOVOU 16(DX), X13 // X13 = m[2]+m[3] - MOVOU 32(DX), X14 // X14 = m[4]+m[5] - MOVOU 48(DX), X15 // X15 = m[6]+m[7] - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */ - MOVOU X14, X9 - LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */ - MOVOU X12, X10 - LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */ - MOVOU X14, X11 - LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 64(DX), X12 // X12 = m[8]+ m[9] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */ - MOVOU X14, X9 - LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */ - MOVOU X12, X10 - LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */ - MOVOU X14, X11 - LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 2 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 112(DX), X12 // X12 = m[14]+m[15] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */ - MOVOU X14, X9 - LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */ - MOVOU 80(DX), X10 // X10 = m[10]+m[11] - MOVOU 48(DX), X11 // X11 = m[6]+ m[7] - LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */ - LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ; - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU X12, X8 - LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */ - MOVOU X14, X9 - LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */ - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X11 // X11 = m[6]+ m[7] - MOVOU 96(DX), X10 // X10 = m[12]+m[13] - LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */ - LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 3 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 32(DX), X12 // X12 = m[4]+ m[5] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X14, X8 - LONG $0x3a0f4566; WORD $0xc50f; BYTE $0x08 // PALIGNR XMM8, XMM13, 0x8 /* m[11], m[12] */ - MOVOU X12, X9 - LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[5], m[15] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 64(DX), X10 // X10 = m[8]+ m[9] - LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[8], m[0] */ - LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */ - MOVOU X13, X11 - LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[2], ___ */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - MOVOU X12, X9 - LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[3] */ - MOVOU X15, X8 - LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[10], ___ */ - MOVOU X13, X9 - LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[7], m[9] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X11 // X11 = m[4]+ m[5] - MOVOU 112(DX), X10 // X10 = m[14]+m[15] - LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[14], m[6] */ - LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[1], m[4] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 4 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - MOVOU X13, X8 - LONG $0x6d0f4566; BYTE $0xc4 // PUNPCKHQDQ XMM8, XMM12 /* m[7], m[3] */ - MOVOU X15, X9 - LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[13], m[11] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 64(DX), X10 // X10 = m[8]+ m[9] - MOVOU 112(DX), X14 // X14 = m[14]+m[15] - LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* m[9], m[1] */ - MOVOU X15, X11 - LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[12], m[14] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X13, X9 - LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* ___, m[5] */ - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[2], ____ */ - MOVOU X15, X10 - LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* ___, m[15] */ - MOVOU X13, X9 - LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[4], ____ */ - MOVOU 0(DX), X11 // X11 = m[0]+ m[1] - MOVOU 48(DX), X10 // X10 = m[6]+ m[7] - MOVOU 64(DX), X15 // X15 = m[8]+ m[9] - LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[6], m[10] */ - LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[0], m[8] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 5 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - MOVOU X14, X8 - LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[9], m[5] */ - MOVOU X12, X9 - LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[2], m[10] */ - MOVOU 0(DX), X10 // X10 = m[0]+ m[1] - MOVOU 48(DX), X14 // X14 = m[6]+ m[7] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[7] */ - LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[0], ____ */ - LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[15] */ - MOVOU X13, X11 - LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[11] */ - MOVOU X15, X8 - LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[14], ____ */ - LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[3] */ - MOVOU X13, X9 - LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[6], ____ */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 64(DX), X11 // X11 = m[8]+ m[9] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU X14, X10 - LONG $0x3a0f4566; WORD $0xd40f; BYTE $0x08 // PALIGNR XMM10, XMM12, 0x8 /* m[1], m[12] */ - LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */ - LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[8], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 6 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 48(DX), X14 // X14 = m[6]+ m[7] - MOVOU 64(DX), X15 // X15 = m[8]+ m[9] - MOVOU X13, X8 - LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[2], m[6] */ - MOVOU X12, X9 - LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[0], m[8] */ - MOVOU 80(DX), X12 // X12 = m[10]+m[11] - MOVOU 96(DX), X10 // X10 = m[12]+m[13] - LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[10] */ - MOVOU X12, X11 - LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[11], m[3] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 48(DX), X14 // X14 = m[6]+ m[7] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X14, X9 - LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* ___, m[7] */ - MOVOU X13, X8 - LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[4], ____ */ - MOVOU X15, X9 - LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[15], m[1] */ - MOVOU 64(DX), X12 // X12 = m[8]+ m[9] - MOVOU 96(DX), X10 // X10 = m[12]+m[13] - LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[13], m[5] */ - LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[9] */ - MOVOU X15, X11 - LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[14], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 7 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X12, X9 - LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[1] */ - MOVOU X14, X8 - LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */ - MOVOU X15, X9 - LONG $0x6c0f4566; BYTE $0xcd // PUNPCKLQDQ XMM9, XMM13 /* m[14], m[4] */ - MOVOU 80(DX), X11 // X11 = m[10]+m[11] - MOVOU X13, X10 - LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* m[5], m[15] */ - LONG $0x3a0f4566; WORD $0xde0f; BYTE $0x08 // PALIGNR XMM11, XMM14, 0x8 /* m[13], m[10] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[6] */ - MOVOU X14, X9 - LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[9], m[8] */ - MOVOU 16(DX), X11 // X14 = m[2]+ m[3] - MOVOU X13, X10 - LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[7], m[3] */ - LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[11] */ - LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[2], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 8 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X14, X8 - LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[13], m[7] */ - MOVOU X12, X10 - LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* ___, m[3] */ - MOVOU X14, X9 - LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[12], ____ */ - MOVOU 0(DX), X11 // X11 = m[0]+ m[1] - MOVOU 64(DX), X13 // X13 = m[8]+ m[9] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU X15, X10 - LONG $0x3a0f4566; WORD $0xd60f; BYTE $0x08 // PALIGNR XMM10, XMM14, 0x8 /* m[11], m[14] */ - LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[1], m[9] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X13, X8 - LONG $0x6d0f4566; BYTE $0xc7 // PUNPCKHQDQ XMM8, XMM15 /* m[5], m[15] */ - MOVOU X14, X9 - LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[8], m[2] */ - MOVOU 0(DX), X10 // X10 = m[0]+ m[1] - MOVOU 48(DX), X11 // X11 = m[6]+ m[7] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[0], m[4] */ - LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], m[10] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 9 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X13, X8 - LONG $0x6c0f4566; BYTE $0xc7 // PUNPCKLQDQ XMM8, XMM15 /* m[6], m[14] */ - MOVOU X12, X9 - LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[11], m[0] */ - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 64(DX), X11 // X11 = m[8]+ m[9] - MOVOU X15, X10 - LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[15], m[9] */ - LONG $0x3a0f4566; WORD $0xdd0f; BYTE $0x08 // PALIGNR XMM11, XMM13, 0x8 /* m[3], m[8] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 16(DX), X13 // X13 = m[2]+ m[3] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - MOVOU X15, X9 - LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* ___, m[13] */ - MOVOU X15, X8 - LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */ - MOVOU X14, X9 - LONG $0x3a0f4566; WORD $0xcc0f; BYTE $0x08 // PALIGNR XMM9, XMM12, 0x8 /* m[1], m[10] */ - MOVOU 32(DX), X12 // X12 = m[4]+ m[5] - MOVOU 48(DX), X15 // X15 = m[6]+ m[7] - MOVOU X15, X11 - LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* ___, m[7] */ - MOVOU X13, X10 - LONG $0x6c0f4566; BYTE $0xd3 // PUNPCKLQDQ XMM10, XMM11 /* m[2], ____ */ - MOVOU X12, X15 - LONG $0x6d0f4566; BYTE $0xfc // PUNPCKHQDQ XMM15, XMM12 /* ___, m[5] */ - MOVOU X12, X11 - LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 0 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 48(DX), X13 // X13 = m[6]+ m[7] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 80(DX), X15 // X15 = m[10]+m[11] - MOVOU X15, X8 - LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[10], m[8] */ - MOVOU X13, X9 - LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[7], m[1] */ - MOVOU 16(DX), X10 // X10 = m[2]+ m[3] - MOVOU 32(DX), X14 // X14 = m[4]+ m[5] - LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[2], m[4] */ - MOVOU X14, X15 - LONG $0x6d0f4566; BYTE $0xfe // PUNPCKHQDQ XMM15, XMM14 /* ___, m[5] */ - MOVOU X13, X11 - LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], ____ */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 64(DX), X13 // X13 = m[8]+ m[9] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X15, X8 - LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[15], m[9] */ - MOVOU X12, X9 - LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[3], m[13] */ - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU X15, X10 - LONG $0x3a0f4566; WORD $0xd50f; BYTE $0x08 // PALIGNR XMM10, XMM13, 0x8 /* m[11], m[14] */ - MOVOU X14, X11 - LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[12], m[0] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 1 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+m[1] - MOVOU 16(DX), X13 // X13 = m[2]+m[3] - MOVOU 32(DX), X14 // X14 = m[4]+m[5] - MOVOU 48(DX), X15 // X15 = m[6]+m[7] - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */ - MOVOU X14, X9 - LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */ - MOVOU X12, X10 - LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */ - MOVOU X14, X11 - LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */ - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 64(DX), X12 // X12 = m[8]+ m[9] - MOVOU 80(DX), X13 // X13 = m[10]+m[11] - MOVOU 96(DX), X14 // X14 = m[12]+m[13] - MOVOU 112(DX), X15 // X15 = m[14]+m[15] - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */ - MOVOU X14, X9 - LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */ - MOVOU X12, X10 - LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */ - MOVOU X14, X11 - LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - /////////////////////////////////////////////////////////////////////////// - // R O U N D 1 2 - /////////////////////////////////////////////////////////////////////////// - - // LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register) - MOVOU 112(DX), X12 // X12 = m[14]+m[15] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 64(DX), X14 // X14 = m[8]+ m[9] - MOVOU 96(DX), X15 // X15 = m[12]+m[13] - MOVOU X12, X8 - LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */ - MOVOU X14, X9 - LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */ - MOVOU 80(DX), X10 // X10 = m[10]+m[11] - MOVOU 48(DX), X11 // X11 = m[6]+ m[7] - LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */ - LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ; - - LOAD_SHUFFLE - G1 - G2 - DIAGONALIZE - - // LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register) - MOVOU 0(DX), X12 // X12 = m[0]+ m[1] - MOVOU 32(DX), X13 // X13 = m[4]+ m[5] - MOVOU 80(DX), X14 // X14 = m[10]+m[11] - MOVOU X12, X8 - LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */ - MOVOU X14, X9 - LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */ - MOVOU 16(DX), X12 // X12 = m[2]+ m[3] - MOVOU 48(DX), X11 // X11 = m[6]+ m[7] - MOVOU 96(DX), X10 // X10 = m[12]+m[13] - LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */ - LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */ - - LOAD_SHUFFLE - G1 - G2 - UNDIAGONALIZE - - // Reload digest (most current value store in &out) - MOVQ out+144(FP), SI // SI: &in - MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */ - MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */ - MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */ - MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */ - - // Final computations and prepare for storing - PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */ - PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */ - PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */ - PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */ - PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */ - PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */ - PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */ - PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */ - - // Store digest into &out - MOVQ out+144(FP), SI // SI: &out - MOVOU X0, 0(SI) // out[0]+out[1] = X0 - MOVOU X1, 16(SI) // out[2]+out[3] = X1 - MOVOU X2, 32(SI) // out[4]+out[5] = X2 - MOVOU X3, 48(SI) // out[6]+out[7] = X3 - - // Increment message pointer and check if there's more to do - ADDQ $128, DX // message += 128 - SUBQ $1, R8 - JNZ loop - -complete: - RET diff --git a/vendor/github.com/minio/blake2b-simd/compress_amd64.go b/vendor/github.com/minio/blake2b-simd/compress_amd64.go deleted file mode 100644 index 4fc5e388c..000000000 --- a/vendor/github.com/minio/blake2b-simd/compress_amd64.go +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package blake2b - -func compress(d *digest, p []uint8) { - // Verifies if AVX2 or AVX is available, use optimized code path. - if avx2 { - compressAVX2(d, p) - } else if avx { - compressAVX(d, p) - } else if ssse3 { - compressSSE(d, p) - } else { - compressGeneric(d, p) - } -} diff --git a/vendor/github.com/minio/blake2b-simd/compress_generic.go b/vendor/github.com/minio/blake2b-simd/compress_generic.go deleted file mode 100644 index e9e16e8b9..000000000 --- a/vendor/github.com/minio/blake2b-simd/compress_generic.go +++ /dev/null @@ -1,1419 +0,0 @@ -// Written in 2012 by Dmitry Chestnykh. -// -// To the extent possible under law, the author have dedicated all copyright -// and related and neighboring rights to this software to the public domain -// worldwide. This software is distributed without any warranty. -// http://creativecommons.org/publicdomain/zero/1.0/ - -package blake2b - -func compressGeneric(d *digest, p []uint8) { - h0, h1, h2, h3, h4, h5, h6, h7 := d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] - - for len(p) >= BlockSize { - // Increment counter. - d.t[0] += BlockSize - if d.t[0] < BlockSize { - d.t[1]++ - } - // Initialize compression function. - v0, v1, v2, v3, v4, v5, v6, v7 := h0, h1, h2, h3, h4, h5, h6, h7 - v8 := iv[0] - v9 := iv[1] - v10 := iv[2] - v11 := iv[3] - v12 := iv[4] ^ d.t[0] - v13 := iv[5] ^ d.t[1] - v14 := iv[6] ^ d.f[0] - v15 := iv[7] ^ d.f[1] - - j := 0 - var m [16]uint64 - for i := range m { - m[i] = uint64(p[j]) | uint64(p[j+1])<<8 | uint64(p[j+2])<<16 | - uint64(p[j+3])<<24 | uint64(p[j+4])<<32 | uint64(p[j+5])<<40 | - uint64(p[j+6])<<48 | uint64(p[j+7])<<56 - j += 8 - } - - // Round 1. - v0 += m[0] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[2] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[4] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[6] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[5] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[7] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[3] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[1] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[8] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[10] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[12] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[14] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[13] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[15] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[11] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[9] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 2. - v0 += m[14] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[4] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[9] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[13] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[15] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[6] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[8] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[10] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[1] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[0] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[11] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[5] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[7] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[3] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[2] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[12] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 3. - v0 += m[11] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[12] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[5] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[15] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[2] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[13] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[0] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[8] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[10] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[3] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[7] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[9] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[1] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[4] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[6] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[14] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 4. - v0 += m[7] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[3] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[13] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[11] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[12] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[14] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[1] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[9] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[2] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[5] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[4] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[15] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[0] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[8] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[10] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[6] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 5. - v0 += m[9] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[5] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[2] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[10] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[4] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[15] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[7] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[0] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[14] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[11] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[6] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[3] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[8] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[13] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[12] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[1] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 6. - v0 += m[2] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[6] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[0] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[8] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[11] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[3] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[10] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[12] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[4] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[7] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[15] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[1] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[14] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[9] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[5] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[13] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 7. - v0 += m[12] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[1] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[14] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[4] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[13] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[10] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[15] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[5] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[0] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[6] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[9] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[8] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[2] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[11] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[3] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[7] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 8. - v0 += m[13] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[7] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[12] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[3] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[1] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[9] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[14] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[11] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[5] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[15] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[8] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[2] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[6] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[10] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[4] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[0] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 9. - v0 += m[6] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[14] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[11] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[0] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[3] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[8] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[9] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[15] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[12] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[13] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[1] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[10] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[4] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[5] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[7] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[2] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 10. - v0 += m[10] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[8] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[7] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[1] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[6] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[5] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[4] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[2] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[15] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[9] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[3] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[13] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[12] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[0] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[14] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[11] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 11. - v0 += m[0] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[2] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[4] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[6] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[5] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[7] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[3] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[1] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[8] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[10] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[12] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[14] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[13] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[15] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[11] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[9] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - // Round 12. - v0 += m[14] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-32) | v12>>32 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-24) | v4>>24 - v1 += m[4] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-32) | v13>>32 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-24) | v5>>24 - v2 += m[9] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-32) | v14>>32 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-24) | v6>>24 - v3 += m[13] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-32) | v15>>32 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-24) | v7>>24 - v2 += m[15] - v2 += v6 - v14 ^= v2 - v14 = v14<<(64-16) | v14>>16 - v10 += v14 - v6 ^= v10 - v6 = v6<<(64-63) | v6>>63 - v3 += m[6] - v3 += v7 - v15 ^= v3 - v15 = v15<<(64-16) | v15>>16 - v11 += v15 - v7 ^= v11 - v7 = v7<<(64-63) | v7>>63 - v1 += m[8] - v1 += v5 - v13 ^= v1 - v13 = v13<<(64-16) | v13>>16 - v9 += v13 - v5 ^= v9 - v5 = v5<<(64-63) | v5>>63 - v0 += m[10] - v0 += v4 - v12 ^= v0 - v12 = v12<<(64-16) | v12>>16 - v8 += v12 - v4 ^= v8 - v4 = v4<<(64-63) | v4>>63 - v0 += m[1] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-32) | v15>>32 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-24) | v5>>24 - v1 += m[0] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-32) | v12>>32 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-24) | v6>>24 - v2 += m[11] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-32) | v13>>32 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-24) | v7>>24 - v3 += m[5] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-32) | v14>>32 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-24) | v4>>24 - v2 += m[7] - v2 += v7 - v13 ^= v2 - v13 = v13<<(64-16) | v13>>16 - v8 += v13 - v7 ^= v8 - v7 = v7<<(64-63) | v7>>63 - v3 += m[3] - v3 += v4 - v14 ^= v3 - v14 = v14<<(64-16) | v14>>16 - v9 += v14 - v4 ^= v9 - v4 = v4<<(64-63) | v4>>63 - v1 += m[2] - v1 += v6 - v12 ^= v1 - v12 = v12<<(64-16) | v12>>16 - v11 += v12 - v6 ^= v11 - v6 = v6<<(64-63) | v6>>63 - v0 += m[12] - v0 += v5 - v15 ^= v0 - v15 = v15<<(64-16) | v15>>16 - v10 += v15 - v5 ^= v10 - v5 = v5<<(64-63) | v5>>63 - - h0 ^= v0 ^ v8 - h1 ^= v1 ^ v9 - h2 ^= v2 ^ v10 - h3 ^= v3 ^ v11 - h4 ^= v4 ^ v12 - h5 ^= v5 ^ v13 - h6 ^= v6 ^ v14 - h7 ^= v7 ^ v15 - - p = p[BlockSize:] - } - d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 -} diff --git a/vendor/github.com/minio/blake2b-simd/compress_noasm.go b/vendor/github.com/minio/blake2b-simd/compress_noasm.go deleted file mode 100644 index d3c675847..000000000 --- a/vendor/github.com/minio/blake2b-simd/compress_noasm.go +++ /dev/null @@ -1,23 +0,0 @@ -//+build !amd64 noasm appengine - -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package blake2b - -func compress(d *digest, p []uint8) { - compressGeneric(d, p) -} diff --git a/vendor/github.com/minio/blake2b-simd/cpuid.go b/vendor/github.com/minio/blake2b-simd/cpuid.go deleted file mode 100644 index a9f95508e..000000000 --- a/vendor/github.com/minio/blake2b-simd/cpuid.go +++ /dev/null @@ -1,60 +0,0 @@ -// +build 386,!gccgo amd64,!gccgo - -// Copyright 2016 Frank Wessels -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -package blake2b - -func cpuid(op uint32) (eax, ebx, ecx, edx uint32) -func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) -func xgetbv(index uint32) (eax, edx uint32) - -// True when SIMD instructions are available. -var avx2 = haveAVX2() -var avx = haveAVX() -var ssse3 = haveSSSE3() - -// haveAVX returns true when there is AVX support -func haveAVX() bool { - _, _, c, _ := cpuid(1) - - // Check XGETBV, OXSAVE and AVX bits - if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { - // Check for OS support - eax, _ := xgetbv(0) - return (eax & 0x6) == 0x6 - } - return false -} - -// haveAVX2 returns true when there is AVX2 support -func haveAVX2() bool { - mfi, _, _, _ := cpuid(0) - - // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. - if mfi >= 7 && haveAVX() { - _, ebx, _, _ := cpuidex(7, 0) - return (ebx & 0x00000020) != 0 - } - return false -} - -// haveSSSE3 returns true when there is SSSE3 support -func haveSSSE3() bool { - - _, _, c, _ := cpuid(1) - - return (c & 0x00000200) != 0 -} diff --git a/vendor/github.com/minio/blake2b-simd/cpuid_386.s b/vendor/github.com/minio/blake2b-simd/cpuid_386.s deleted file mode 100644 index fa38814ec..000000000 --- a/vendor/github.com/minio/blake2b-simd/cpuid_386.s +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. - -// +build 386,!gccgo - -// func cpuid(op uint32) (eax, ebx, ecx, edx uint32) -TEXT ·cpuid(SB), 7, $0 - XORL CX, CX - MOVL op+0(FP), AX - CPUID - MOVL AX, eax+4(FP) - MOVL BX, ebx+8(FP) - MOVL CX, ecx+12(FP) - MOVL DX, edx+16(FP) - RET - -// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) -TEXT ·cpuidex(SB), 7, $0 - MOVL op+0(FP), AX - MOVL op2+4(FP), CX - CPUID - MOVL AX, eax+8(FP) - MOVL BX, ebx+12(FP) - MOVL CX, ecx+16(FP) - MOVL DX, edx+20(FP) - RET - -// func xgetbv(index uint32) (eax, edx uint32) -TEXT ·xgetbv(SB), 7, $0 - MOVL index+0(FP), CX - BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV - MOVL AX, eax+4(FP) - MOVL DX, edx+8(FP) - RET diff --git a/vendor/github.com/minio/blake2b-simd/cpuid_amd64.s b/vendor/github.com/minio/blake2b-simd/cpuid_amd64.s deleted file mode 100644 index fb45a6560..000000000 --- a/vendor/github.com/minio/blake2b-simd/cpuid_amd64.s +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. - -// +build amd64,!gccgo - -// func cpuid(op uint32) (eax, ebx, ecx, edx uint32) -TEXT ·cpuid(SB), 7, $0 - XORQ CX, CX - MOVL op+0(FP), AX - CPUID - MOVL AX, eax+8(FP) - MOVL BX, ebx+12(FP) - MOVL CX, ecx+16(FP) - MOVL DX, edx+20(FP) - RET - - -// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) -TEXT ·cpuidex(SB), 7, $0 - MOVL op+0(FP), AX - MOVL op2+4(FP), CX - CPUID - MOVL AX, eax+8(FP) - MOVL BX, ebx+12(FP) - MOVL CX, ecx+16(FP) - MOVL DX, edx+20(FP) - RET - -// func xgetbv(index uint32) (eax, edx uint32) -TEXT ·xgetbv(SB), 7, $0 - MOVL index+0(FP), CX - BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV - MOVL AX, eax+8(FP) - MOVL DX, edx+12(FP) - RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b.go b/vendor/golang.org/x/crypto/blake2b/blake2b.go new file mode 100644 index 000000000..fa9e48e31 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b.go @@ -0,0 +1,194 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package blake2b implements the BLAKE2b hash algorithm as +// defined in RFC 7693. +package blake2b + +import ( + "encoding/binary" + "errors" + "hash" +) + +const ( + // The blocksize of BLAKE2b in bytes. + BlockSize = 128 + // The hash size of BLAKE2b-512 in bytes. + Size = 64 + // The hash size of BLAKE2b-384 in bytes. + Size384 = 48 + // The hash size of BLAKE2b-256 in bytes. + Size256 = 32 +) + +var ( + useAVX2 bool + useAVX bool + useSSE4 bool +) + +var errKeySize = errors.New("blake2b: invalid key size") + +var iv = [8]uint64{ + 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, +} + +// Sum512 returns the BLAKE2b-512 checksum of the data. +func Sum512(data []byte) [Size]byte { + var sum [Size]byte + checkSum(&sum, Size, data) + return sum +} + +// Sum384 returns the BLAKE2b-384 checksum of the data. +func Sum384(data []byte) [Size384]byte { + var sum [Size]byte + var sum384 [Size384]byte + checkSum(&sum, Size384, data) + copy(sum384[:], sum[:Size384]) + return sum384 +} + +// Sum256 returns the BLAKE2b-256 checksum of the data. +func Sum256(data []byte) [Size256]byte { + var sum [Size]byte + var sum256 [Size256]byte + checkSum(&sum, Size256, data) + copy(sum256[:], sum[:Size256]) + return sum256 +} + +// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil +// key turns the hash into a MAC. The key must between zero and 64 bytes long. +func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) } + +// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil +// key turns the hash into a MAC. The key must between zero and 64 bytes long. +func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) } + +// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil +// key turns the hash into a MAC. The key must between zero and 64 bytes long. +func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) } + +func newDigest(hashSize int, key []byte) (*digest, error) { + if len(key) > Size { + return nil, errKeySize + } + d := &digest{ + size: hashSize, + keyLen: len(key), + } + copy(d.key[:], key) + d.Reset() + return d, nil +} + +func checkSum(sum *[Size]byte, hashSize int, data []byte) { + h := iv + h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24) + var c [2]uint64 + + if length := len(data); length > BlockSize { + n := length &^ (BlockSize - 1) + if length == n { + n -= BlockSize + } + hashBlocks(&h, &c, 0, data[:n]) + data = data[n:] + } + + var block [BlockSize]byte + offset := copy(block[:], data) + remaining := uint64(BlockSize - offset) + if c[0] < remaining { + c[1]-- + } + c[0] -= remaining + + hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) + + for i, v := range h[:(hashSize+7)/8] { + binary.LittleEndian.PutUint64(sum[8*i:], v) + } +} + +type digest struct { + h [8]uint64 + c [2]uint64 + size int + block [BlockSize]byte + offset int + + key [BlockSize]byte + keyLen int +} + +func (d *digest) BlockSize() int { return BlockSize } + +func (d *digest) Size() int { return d.size } + +func (d *digest) Reset() { + d.h = iv + d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24) + d.offset, d.c[0], d.c[1] = 0, 0, 0 + if d.keyLen > 0 { + d.block = d.key + d.offset = BlockSize + } +} + +func (d *digest) Write(p []byte) (n int, err error) { + n = len(p) + + if d.offset > 0 { + remaining := BlockSize - d.offset + if n <= remaining { + d.offset += copy(d.block[d.offset:], p) + return + } + copy(d.block[d.offset:], p[:remaining]) + hashBlocks(&d.h, &d.c, 0, d.block[:]) + d.offset = 0 + p = p[remaining:] + } + + if length := len(p); length > BlockSize { + nn := length &^ (BlockSize - 1) + if length == nn { + nn -= BlockSize + } + hashBlocks(&d.h, &d.c, 0, p[:nn]) + p = p[nn:] + } + + if len(p) > 0 { + d.offset += copy(d.block[:], p) + } + + return +} + +func (d *digest) Sum(b []byte) []byte { + var block [BlockSize]byte + copy(block[:], d.block[:d.offset]) + remaining := uint64(BlockSize - d.offset) + + c := d.c + if c[0] < remaining { + c[1]-- + } + c[0] -= remaining + + h := d.h + hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) + + var sum [Size]byte + for i, v := range h[:(d.size+7)/8] { + binary.LittleEndian.PutUint64(sum[8*i:], v) + } + + return append(b, sum[:d.size]...) +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go new file mode 100644 index 000000000..8c41cf6c7 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go @@ -0,0 +1,43 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.7,amd64,!gccgo,!appengine + +package blake2b + +func init() { + useAVX2 = supportsAVX2() + useAVX = supportsAVX() + useSSE4 = supportsSSE4() +} + +//go:noescape +func supportsSSE4() bool + +//go:noescape +func supportsAVX() bool + +//go:noescape +func supportsAVX2() bool + +//go:noescape +func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +//go:noescape +func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +//go:noescape +func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + if useAVX2 { + hashBlocksAVX2(h, c, flag, blocks) + } else if useAVX { + hashBlocksAVX(h, c, flag, blocks) + } else if useSSE4 { + hashBlocksSSE4(h, c, flag, blocks) + } else { + hashBlocksGeneric(h, c, flag, blocks) + } +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s new file mode 100644 index 000000000..784bce6a9 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s @@ -0,0 +1,762 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.7,amd64,!gccgo,!appengine + +#include "textflag.h" + +DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b +DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 +DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f +DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b +DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a +DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 + +DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b +GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 +DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 +DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 + +DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 +DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 + +#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 +#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 +#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e +#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 +#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 + +#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ + VPADDQ m0, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFD $-79, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPSHUFB c40, Y1, Y1; \ + VPADDQ m1, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFB c48, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPADDQ Y1, Y1, t; \ + VPSRLQ $63, Y1, Y1; \ + VPXOR t, Y1, Y1; \ + VPERMQ_0x39_Y1_Y1; \ + VPERMQ_0x4E_Y2_Y2; \ + VPERMQ_0x93_Y3_Y3; \ + VPADDQ m2, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFD $-79, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPSHUFB c40, Y1, Y1; \ + VPADDQ m3, Y0, Y0; \ + VPADDQ Y1, Y0, Y0; \ + VPXOR Y0, Y3, Y3; \ + VPSHUFB c48, Y3, Y3; \ + VPADDQ Y3, Y2, Y2; \ + VPXOR Y2, Y1, Y1; \ + VPADDQ Y1, Y1, t; \ + VPSRLQ $63, Y1, Y1; \ + VPXOR t, Y1, Y1; \ + VPERMQ_0x39_Y3_Y3; \ + VPERMQ_0x4E_Y2_Y2; \ + VPERMQ_0x93_Y1_Y1 + +#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E +#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 +#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E +#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 +#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E + +#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n +#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n +#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n +#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n +#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n + +#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 +#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 +#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 +#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 +#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 + +#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 +#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 + +#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 +#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 + +// load msg: Y12 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ + VMOVQ_SI_X12(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X12(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y12, Y12 + +// load msg: Y13 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ + VMOVQ_SI_X13(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X13(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y13, Y13 + +// load msg: Y14 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ + VMOVQ_SI_X14(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X14(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y14, Y14 + +// load msg: Y15 = (i0, i1, i2, i3) +// i0, i1, i2, i3 must not be 0 +#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ + VMOVQ_SI_X15(i0*8); \ + VMOVQ_SI_X11(i2*8); \ + VPINSRQ_1_SI_X15(i1*8); \ + VPINSRQ_1_SI_X11(i3*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ + VMOVQ_SI_X12_0; \ + VMOVQ_SI_X11(4*8); \ + VPINSRQ_1_SI_X12(2*8); \ + VPINSRQ_1_SI_X11(6*8); \ + VINSERTI128 $1, X11, Y12, Y12; \ + LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ + LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ + LOAD_MSG_AVX2_Y15(9, 11, 13, 15) + +#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ + LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ + LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ + VMOVQ_SI_X11(11*8); \ + VPSHUFD $0x4E, 0*8(SI), X14; \ + VPINSRQ_1_SI_X11(5*8); \ + VINSERTI128 $1, X11, Y14, Y14; \ + LOAD_MSG_AVX2_Y15(12, 2, 7, 3) + +#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ + VMOVQ_SI_X11(5*8); \ + VMOVDQU 11*8(SI), X12; \ + VPINSRQ_1_SI_X11(15*8); \ + VINSERTI128 $1, X11, Y12, Y12; \ + VMOVQ_SI_X13(8*8); \ + VMOVQ_SI_X11(2*8); \ + VPINSRQ_1_SI_X13_0; \ + VPINSRQ_1_SI_X11(13*8); \ + VINSERTI128 $1, X11, Y13, Y13; \ + LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ + LOAD_MSG_AVX2_Y15(14, 6, 1, 4) + +#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ + LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ + LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ + LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ + VMOVQ_SI_X15(6*8); \ + VMOVQ_SI_X11_0; \ + VPINSRQ_1_SI_X15(10*8); \ + VPINSRQ_1_SI_X11(8*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ + LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ + VMOVQ_SI_X13_0; \ + VMOVQ_SI_X11(4*8); \ + VPINSRQ_1_SI_X13(7*8); \ + VPINSRQ_1_SI_X11(15*8); \ + VINSERTI128 $1, X11, Y13, Y13; \ + LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ + LOAD_MSG_AVX2_Y15(1, 12, 8, 13) + +#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ + VMOVQ_SI_X12(2*8); \ + VMOVQ_SI_X11_0; \ + VPINSRQ_1_SI_X12(6*8); \ + VPINSRQ_1_SI_X11(8*8); \ + VINSERTI128 $1, X11, Y12, Y12; \ + LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ + LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ + LOAD_MSG_AVX2_Y15(13, 5, 14, 9) + +#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ + LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ + LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ + VMOVQ_SI_X14_0; \ + VPSHUFD $0x4E, 8*8(SI), X11; \ + VPINSRQ_1_SI_X14(6*8); \ + VINSERTI128 $1, X11, Y14, Y14; \ + LOAD_MSG_AVX2_Y15(7, 3, 2, 11) + +#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ + LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ + LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ + LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ + VMOVQ_SI_X15_0; \ + VMOVQ_SI_X11(6*8); \ + VPINSRQ_1_SI_X15(4*8); \ + VPINSRQ_1_SI_X11(10*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ + VMOVQ_SI_X12(6*8); \ + VMOVQ_SI_X11(11*8); \ + VPINSRQ_1_SI_X12(14*8); \ + VPINSRQ_1_SI_X11_0; \ + VINSERTI128 $1, X11, Y12, Y12; \ + LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ + VMOVQ_SI_X11(1*8); \ + VMOVDQU 12*8(SI), X14; \ + VPINSRQ_1_SI_X11(10*8); \ + VINSERTI128 $1, X11, Y14, Y14; \ + VMOVQ_SI_X15(2*8); \ + VMOVDQU 4*8(SI), X11; \ + VPINSRQ_1_SI_X15(7*8); \ + VINSERTI128 $1, X11, Y15, Y15 + +#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ + LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ + VMOVQ_SI_X13(2*8); \ + VPSHUFD $0x4E, 5*8(SI), X11; \ + VPINSRQ_1_SI_X13(4*8); \ + VINSERTI128 $1, X11, Y13, Y13; \ + LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ + VMOVQ_SI_X15(11*8); \ + VMOVQ_SI_X11(12*8); \ + VPINSRQ_1_SI_X15(14*8); \ + VPINSRQ_1_SI_X11_0; \ + VINSERTI128 $1, X11, Y15, Y15 + +// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) +TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + + MOVQ SP, DX + MOVQ SP, R9 + ADDQ $31, R9 + ANDQ $~31, R9 + MOVQ R9, SP + + MOVQ CX, 16(SP) + XORQ CX, CX + MOVQ CX, 24(SP) + + VMOVDQU ·AVX2_c40<>(SB), Y4 + VMOVDQU ·AVX2_c48<>(SB), Y5 + + VMOVDQU 0(AX), Y8 + VMOVDQU 32(AX), Y9 + VMOVDQU ·AVX2_iv0<>(SB), Y6 + VMOVDQU ·AVX2_iv1<>(SB), Y7 + + MOVQ 0(BX), R8 + MOVQ 8(BX), R9 + MOVQ R9, 8(SP) + +loop: + ADDQ $128, R8 + MOVQ R8, 0(SP) + CMPQ R8, $128 + JGE noinc + INCQ R9 + MOVQ R9, 8(SP) + +noinc: + VMOVDQA Y8, Y0 + VMOVDQA Y9, Y1 + VMOVDQA Y6, Y2 + VPXOR 0(SP), Y7, Y3 + + LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() + VMOVDQA Y12, 32(SP) + VMOVDQA Y13, 64(SP) + VMOVDQA Y14, 96(SP) + VMOVDQA Y15, 128(SP) + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() + VMOVDQA Y12, 160(SP) + VMOVDQA Y13, 192(SP) + VMOVDQA Y14, 224(SP) + VMOVDQA Y15, 256(SP) + + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() + ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) + + ROUND_AVX2(32(SP), 64(SP), 96(SP), 128(SP), Y10, Y4, Y5) + ROUND_AVX2(160(SP), 192(SP), 224(SP), 256(SP), Y10, Y4, Y5) + + VPXOR Y0, Y8, Y8 + VPXOR Y1, Y9, Y9 + VPXOR Y2, Y8, Y8 + VPXOR Y3, Y9, Y9 + + LEAQ 128(SI), SI + SUBQ $128, DI + JNE loop + + MOVQ R8, 0(BX) + MOVQ R9, 8(BX) + + VMOVDQU Y8, 0(AX) + VMOVDQU Y9, 32(AX) + VZEROUPPER + + MOVQ DX, SP + RET + +#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA +#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB +#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF +#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD +#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE + +#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 +#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF +#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 +#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF +#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 +#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 +#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF +#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF + +#define SHUFFLE_AVX() \ + VMOVDQA X6, X13; \ + VMOVDQA X2, X14; \ + VMOVDQA X4, X6; \ + VPUNPCKLQDQ_X13_X13_X15; \ + VMOVDQA X5, X4; \ + VMOVDQA X6, X5; \ + VPUNPCKHQDQ_X15_X7_X6; \ + VPUNPCKLQDQ_X7_X7_X15; \ + VPUNPCKHQDQ_X15_X13_X7; \ + VPUNPCKLQDQ_X3_X3_X15; \ + VPUNPCKHQDQ_X15_X2_X2; \ + VPUNPCKLQDQ_X14_X14_X15; \ + VPUNPCKHQDQ_X15_X3_X3; \ + +#define SHUFFLE_AVX_INV() \ + VMOVDQA X2, X13; \ + VMOVDQA X4, X14; \ + VPUNPCKLQDQ_X2_X2_X15; \ + VMOVDQA X5, X4; \ + VPUNPCKHQDQ_X15_X3_X2; \ + VMOVDQA X14, X5; \ + VPUNPCKLQDQ_X3_X3_X15; \ + VMOVDQA X6, X14; \ + VPUNPCKHQDQ_X15_X13_X3; \ + VPUNPCKLQDQ_X7_X7_X15; \ + VPUNPCKHQDQ_X15_X6_X6; \ + VPUNPCKLQDQ_X14_X14_X15; \ + VPUNPCKHQDQ_X15_X7_X7; \ + +#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ + VPADDQ m0, v0, v0; \ + VPADDQ v2, v0, v0; \ + VPADDQ m1, v1, v1; \ + VPADDQ v3, v1, v1; \ + VPXOR v0, v6, v6; \ + VPXOR v1, v7, v7; \ + VPSHUFD $-79, v6, v6; \ + VPSHUFD $-79, v7, v7; \ + VPADDQ v6, v4, v4; \ + VPADDQ v7, v5, v5; \ + VPXOR v4, v2, v2; \ + VPXOR v5, v3, v3; \ + VPSHUFB c40, v2, v2; \ + VPSHUFB c40, v3, v3; \ + VPADDQ m2, v0, v0; \ + VPADDQ v2, v0, v0; \ + VPADDQ m3, v1, v1; \ + VPADDQ v3, v1, v1; \ + VPXOR v0, v6, v6; \ + VPXOR v1, v7, v7; \ + VPSHUFB c48, v6, v6; \ + VPSHUFB c48, v7, v7; \ + VPADDQ v6, v4, v4; \ + VPADDQ v7, v5, v5; \ + VPXOR v4, v2, v2; \ + VPXOR v5, v3, v3; \ + VPADDQ v2, v2, t0; \ + VPSRLQ $63, v2, v2; \ + VPXOR t0, v2, v2; \ + VPADDQ v3, v3, t0; \ + VPSRLQ $63, v3, v3; \ + VPXOR t0, v3, v3 + +// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) +// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 +#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ + VMOVQ_SI_X12(i0*8); \ + VMOVQ_SI_X13(i2*8); \ + VMOVQ_SI_X14(i4*8); \ + VMOVQ_SI_X15(i6*8); \ + VPINSRQ_1_SI_X12(i1*8); \ + VPINSRQ_1_SI_X13(i3*8); \ + VPINSRQ_1_SI_X14(i5*8); \ + VPINSRQ_1_SI_X15(i7*8) + +// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) +#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ + VMOVQ_SI_X12_0; \ + VMOVQ_SI_X13(4*8); \ + VMOVQ_SI_X14(1*8); \ + VMOVQ_SI_X15(5*8); \ + VPINSRQ_1_SI_X12(2*8); \ + VPINSRQ_1_SI_X13(6*8); \ + VPINSRQ_1_SI_X14(3*8); \ + VPINSRQ_1_SI_X15(7*8) + +// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) +#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ + VPSHUFD $0x4E, 0*8(SI), X12; \ + VMOVQ_SI_X13(11*8); \ + VMOVQ_SI_X14(12*8); \ + VMOVQ_SI_X15(7*8); \ + VPINSRQ_1_SI_X13(5*8); \ + VPINSRQ_1_SI_X14(2*8); \ + VPINSRQ_1_SI_X15(3*8) + +// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) +#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ + VMOVDQU 11*8(SI), X12; \ + VMOVQ_SI_X13(5*8); \ + VMOVQ_SI_X14(8*8); \ + VMOVQ_SI_X15(2*8); \ + VPINSRQ_1_SI_X13(15*8); \ + VPINSRQ_1_SI_X14_0; \ + VPINSRQ_1_SI_X15(13*8) + +// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) +#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ + VMOVQ_SI_X12(2*8); \ + VMOVQ_SI_X13(4*8); \ + VMOVQ_SI_X14(6*8); \ + VMOVQ_SI_X15_0; \ + VPINSRQ_1_SI_X12(5*8); \ + VPINSRQ_1_SI_X13(15*8); \ + VPINSRQ_1_SI_X14(10*8); \ + VPINSRQ_1_SI_X15(8*8) + +// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) +#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ + VMOVQ_SI_X12(9*8); \ + VMOVQ_SI_X13(2*8); \ + VMOVQ_SI_X14_0; \ + VMOVQ_SI_X15(4*8); \ + VPINSRQ_1_SI_X12(5*8); \ + VPINSRQ_1_SI_X13(10*8); \ + VPINSRQ_1_SI_X14(7*8); \ + VPINSRQ_1_SI_X15(15*8) + +// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) +#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ + VMOVQ_SI_X12(2*8); \ + VMOVQ_SI_X13_0; \ + VMOVQ_SI_X14(12*8); \ + VMOVQ_SI_X15(11*8); \ + VPINSRQ_1_SI_X12(6*8); \ + VPINSRQ_1_SI_X13(8*8); \ + VPINSRQ_1_SI_X14(10*8); \ + VPINSRQ_1_SI_X15(3*8) + +// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) +#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ + MOVQ 0*8(SI), X12; \ + VPSHUFD $0x4E, 8*8(SI), X13; \ + MOVQ 7*8(SI), X14; \ + MOVQ 2*8(SI), X15; \ + VPINSRQ_1_SI_X12(6*8); \ + VPINSRQ_1_SI_X14(3*8); \ + VPINSRQ_1_SI_X15(11*8) + +// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) +#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ + MOVQ 6*8(SI), X12; \ + MOVQ 11*8(SI), X13; \ + MOVQ 15*8(SI), X14; \ + MOVQ 3*8(SI), X15; \ + VPINSRQ_1_SI_X12(14*8); \ + VPINSRQ_1_SI_X13_0; \ + VPINSRQ_1_SI_X14(9*8); \ + VPINSRQ_1_SI_X15(8*8) + +// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) +#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ + MOVQ 5*8(SI), X12; \ + MOVQ 8*8(SI), X13; \ + MOVQ 0*8(SI), X14; \ + MOVQ 6*8(SI), X15; \ + VPINSRQ_1_SI_X12(15*8); \ + VPINSRQ_1_SI_X13(2*8); \ + VPINSRQ_1_SI_X14(4*8); \ + VPINSRQ_1_SI_X15(10*8) + +// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) +#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ + VMOVDQU 12*8(SI), X12; \ + MOVQ 1*8(SI), X13; \ + MOVQ 2*8(SI), X14; \ + VPINSRQ_1_SI_X13(10*8); \ + VPINSRQ_1_SI_X14(7*8); \ + VMOVDQU 4*8(SI), X15 + +// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) +#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ + MOVQ 15*8(SI), X12; \ + MOVQ 3*8(SI), X13; \ + MOVQ 11*8(SI), X14; \ + MOVQ 12*8(SI), X15; \ + VPINSRQ_1_SI_X12(9*8); \ + VPINSRQ_1_SI_X13(13*8); \ + VPINSRQ_1_SI_X14(14*8); \ + VPINSRQ_1_SI_X15_0 + +// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) +TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + + MOVQ SP, BP + MOVQ SP, R9 + ADDQ $15, R9 + ANDQ $~15, R9 + MOVQ R9, SP + + VMOVDQU ·AVX_c40<>(SB), X0 + VMOVDQU ·AVX_c48<>(SB), X1 + VMOVDQA X0, X8 + VMOVDQA X1, X9 + + VMOVDQU ·AVX_iv3<>(SB), X0 + VMOVDQA X0, 0(SP) + XORQ CX, 0(SP) // 0(SP) = ·AVX_iv3 ^ (CX || 0) + + VMOVDQU 0(AX), X10 + VMOVDQU 16(AX), X11 + VMOVDQU 32(AX), X2 + VMOVDQU 48(AX), X3 + + MOVQ 0(BX), R8 + MOVQ 8(BX), R9 + +loop: + ADDQ $128, R8 + CMPQ R8, $128 + JGE noinc + INCQ R9 + +noinc: + VMOVQ_R8_X15 + VPINSRQ_1_R9_X15 + + VMOVDQA X10, X0 + VMOVDQA X11, X1 + VMOVDQU ·AVX_iv0<>(SB), X4 + VMOVDQU ·AVX_iv1<>(SB), X5 + VMOVDQU ·AVX_iv2<>(SB), X6 + + VPXOR X15, X6, X6 + VMOVDQA 0(SP), X7 + + LOAD_MSG_AVX_0_2_4_6_1_3_5_7() + VMOVDQA X12, 16(SP) + VMOVDQA X13, 32(SP) + VMOVDQA X14, 48(SP) + VMOVDQA X15, 64(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) + VMOVDQA X12, 80(SP) + VMOVDQA X13, 96(SP) + VMOVDQA X14, 112(SP) + VMOVDQA X15, 128(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) + VMOVDQA X12, 144(SP) + VMOVDQA X13, 160(SP) + VMOVDQA X14, 176(SP) + VMOVDQA X15, 192(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_1_0_11_5_12_2_7_3() + VMOVDQA X12, 208(SP) + VMOVDQA X13, 224(SP) + VMOVDQA X14, 240(SP) + VMOVDQA X15, 256(SP) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_11_12_5_15_8_0_2_13() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_2_5_4_15_6_10_0_8() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_9_5_2_10_0_7_4_15() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_2_6_0_8_12_10_11_3() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_0_6_9_8_7_3_2_11() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_5_15_8_2_0_4_6_10() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX_6_14_11_0_15_9_3_8() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_12_13_1_10_2_7_4_5() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX() + LOAD_MSG_AVX_15_9_3_13_11_14_12_0() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) + SHUFFLE_AVX_INV() + + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X15, X8, X9) + SHUFFLE_AVX() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X15, X8, X9) + SHUFFLE_AVX_INV() + + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X15, X8, X9) + SHUFFLE_AVX() + HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X15, X8, X9) + SHUFFLE_AVX_INV() + + VMOVDQU 32(AX), X14 + VMOVDQU 48(AX), X15 + VPXOR X0, X10, X10 + VPXOR X1, X11, X11 + VPXOR X2, X14, X14 + VPXOR X3, X15, X15 + VPXOR X4, X10, X10 + VPXOR X5, X11, X11 + VPXOR X6, X14, X2 + VPXOR X7, X15, X3 + VMOVDQU X2, 32(AX) + VMOVDQU X3, 48(AX) + + LEAQ 128(SI), SI + SUBQ $128, DI + JNE loop + + VMOVDQU X10, 0(AX) + VMOVDQU X11, 16(AX) + + MOVQ R8, 0(BX) + MOVQ R9, 8(BX) + VZEROUPPER + + MOVQ BP, SP + RET + +// func supportsAVX2() bool +TEXT ·supportsAVX2(SB), 4, $0-1 + MOVQ runtime·support_avx2(SB), AX + MOVB AX, ret+0(FP) + RET + +// func supportsAVX() bool +TEXT ·supportsAVX(SB), 4, $0-1 + MOVQ runtime·support_avx(SB), AX + MOVB AX, ret+0(FP) + RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go new file mode 100644 index 000000000..2ab7c30fc --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go @@ -0,0 +1,25 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !go1.7,amd64,!gccgo,!appengine + +package blake2b + +func init() { + useSSE4 = supportsSSE4() +} + +//go:noescape +func supportsSSE4() bool + +//go:noescape +func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) + +func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + if useSSE4 { + hashBlocksSSE4(h, c, flag, blocks) + } else { + hashBlocksGeneric(h, c, flag, blocks) + } +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s new file mode 100644 index 000000000..64530740b --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s @@ -0,0 +1,290 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +#include "textflag.h" + +DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 +DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b +GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 + +DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b +DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 + +DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 +DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 + +DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b +DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 +GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 + +DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 +DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 + +DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 +DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 + +#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ + MOVO v4, t1; \ + MOVO v5, v4; \ + MOVO t1, v5; \ + MOVO v6, t1; \ + PUNPCKLQDQ v6, t2; \ + PUNPCKHQDQ v7, v6; \ + PUNPCKHQDQ t2, v6; \ + PUNPCKLQDQ v7, t2; \ + MOVO t1, v7; \ + MOVO v2, t1; \ + PUNPCKHQDQ t2, v7; \ + PUNPCKLQDQ v3, t2; \ + PUNPCKHQDQ t2, v2; \ + PUNPCKLQDQ t1, t2; \ + PUNPCKHQDQ t2, v3 + +#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ + MOVO v4, t1; \ + MOVO v5, v4; \ + MOVO t1, v5; \ + MOVO v2, t1; \ + PUNPCKLQDQ v2, t2; \ + PUNPCKHQDQ v3, v2; \ + PUNPCKHQDQ t2, v2; \ + PUNPCKLQDQ v3, t2; \ + MOVO t1, v3; \ + MOVO v6, t1; \ + PUNPCKHQDQ t2, v3; \ + PUNPCKLQDQ v7, t2; \ + PUNPCKHQDQ t2, v6; \ + PUNPCKLQDQ t1, t2; \ + PUNPCKHQDQ t2, v7 + +#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ + PADDQ m0, v0; \ + PADDQ m1, v1; \ + PADDQ v2, v0; \ + PADDQ v3, v1; \ + PXOR v0, v6; \ + PXOR v1, v7; \ + PSHUFD $0xB1, v6, v6; \ + PSHUFD $0xB1, v7, v7; \ + PADDQ v6, v4; \ + PADDQ v7, v5; \ + PXOR v4, v2; \ + PXOR v5, v3; \ + PSHUFB c40, v2; \ + PSHUFB c40, v3; \ + PADDQ m2, v0; \ + PADDQ m3, v1; \ + PADDQ v2, v0; \ + PADDQ v3, v1; \ + PXOR v0, v6; \ + PXOR v1, v7; \ + PSHUFB c48, v6; \ + PSHUFB c48, v7; \ + PADDQ v6, v4; \ + PADDQ v7, v5; \ + PXOR v4, v2; \ + PXOR v5, v3; \ + MOVOU v2, t0; \ + PADDQ v2, t0; \ + PSRLQ $63, v2; \ + PXOR t0, v2; \ + MOVOU v3, t0; \ + PADDQ v3, t0; \ + PSRLQ $63, v3; \ + PXOR t0, v3 + +#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ + MOVQ i0*8(src), m0; \ + PINSRQ $1, i1*8(src), m0; \ + MOVQ i2*8(src), m1; \ + PINSRQ $1, i3*8(src), m1; \ + MOVQ i4*8(src), m2; \ + PINSRQ $1, i5*8(src), m2; \ + MOVQ i6*8(src), m3; \ + PINSRQ $1, i7*8(src), m3 + +// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) +TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + + MOVQ SP, BP + MOVQ SP, R9 + ADDQ $15, R9 + ANDQ $~15, R9 + MOVQ R9, SP + + MOVOU ·iv3<>(SB), X0 + MOVO X0, 0(SP) + XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0) + + MOVOU ·c40<>(SB), X13 + MOVOU ·c48<>(SB), X14 + + MOVOU 0(AX), X12 + MOVOU 16(AX), X15 + + MOVQ 0(BX), R8 + MOVQ 8(BX), R9 + +loop: + ADDQ $128, R8 + CMPQ R8, $128 + JGE noinc + INCQ R9 + +noinc: + MOVQ R8, X8 + PINSRQ $1, R9, X8 + + MOVO X12, X0 + MOVO X15, X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU ·iv0<>(SB), X4 + MOVOU ·iv1<>(SB), X5 + MOVOU ·iv2<>(SB), X6 + + PXOR X8, X6 + MOVO 0(SP), X7 + + LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) + MOVO X8, 16(SP) + MOVO X9, 32(SP) + MOVO X10, 48(SP) + MOVO X11, 64(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) + MOVO X8, 80(SP) + MOVO X9, 96(SP) + MOVO X10, 112(SP) + MOVO X11, 128(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) + MOVO X8, 144(SP) + MOVO X9, 160(SP) + MOVO X10, 176(SP) + MOVO X11, 192(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) + MOVO X8, 208(SP) + MOVO X9, 224(SP) + MOVO X10, 240(SP) + MOVO X11, 256(SP) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14) + SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) + HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14) + SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + + MOVOU 32(AX), X10 + MOVOU 48(AX), X11 + PXOR X0, X12 + PXOR X1, X15 + PXOR X2, X10 + PXOR X3, X11 + PXOR X4, X12 + PXOR X5, X15 + PXOR X6, X10 + PXOR X7, X11 + MOVOU X10, 32(AX) + MOVOU X11, 48(AX) + + LEAQ 128(SI), SI + SUBQ $128, DI + JNE loop + + MOVOU X12, 0(AX) + MOVOU X15, 16(AX) + + MOVQ R8, 0(BX) + MOVQ R9, 8(BX) + + MOVQ BP, SP + RET + +// func supportsSSE4() bool +TEXT ·supportsSSE4(SB), 4, $0-1 + MOVL $1, AX + CPUID + SHRL $19, CX // Bit 19 indicates SSE4 support + ANDL $1, CX // CX != 0 if support SSE4 + MOVB CX, ret+0(FP) + RET diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go new file mode 100644 index 000000000..4bd2abc91 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go @@ -0,0 +1,179 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blake2b + +import "encoding/binary" + +// the precomputed values for BLAKE2b +// there are 12 16-byte arrays - one for each round +// the entries are calculated from the sigma constants. +var precomputed = [12][16]byte{ + {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, + {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, + {11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}, + {7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}, + {9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13}, + {2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9}, + {12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11}, + {13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10}, + {6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5}, + {10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0}, + {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first + {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second +} + +func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + var m [16]uint64 + c0, c1 := c[0], c[1] + + for i := 0; i < len(blocks); { + c0 += BlockSize + if c0 < BlockSize { + c1++ + } + + v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] + v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7] + v12 ^= c0 + v13 ^= c1 + v14 ^= flag + + for j := range m { + m[j] = binary.LittleEndian.Uint64(blocks[i:]) + i += 8 + } + + for j := range precomputed { + s := &(precomputed[j]) + + v0 += m[s[0]] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-32) | v12>>32 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-24) | v4>>24 + v1 += m[s[1]] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-32) | v13>>32 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-24) | v5>>24 + v2 += m[s[2]] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-32) | v14>>32 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-24) | v6>>24 + v3 += m[s[3]] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-32) | v15>>32 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-24) | v7>>24 + + v0 += m[s[4]] + v0 += v4 + v12 ^= v0 + v12 = v12<<(64-16) | v12>>16 + v8 += v12 + v4 ^= v8 + v4 = v4<<(64-63) | v4>>63 + v1 += m[s[5]] + v1 += v5 + v13 ^= v1 + v13 = v13<<(64-16) | v13>>16 + v9 += v13 + v5 ^= v9 + v5 = v5<<(64-63) | v5>>63 + v2 += m[s[6]] + v2 += v6 + v14 ^= v2 + v14 = v14<<(64-16) | v14>>16 + v10 += v14 + v6 ^= v10 + v6 = v6<<(64-63) | v6>>63 + v3 += m[s[7]] + v3 += v7 + v15 ^= v3 + v15 = v15<<(64-16) | v15>>16 + v11 += v15 + v7 ^= v11 + v7 = v7<<(64-63) | v7>>63 + + v0 += m[s[8]] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-32) | v15>>32 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-24) | v5>>24 + v1 += m[s[9]] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-32) | v12>>32 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-24) | v6>>24 + v2 += m[s[10]] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-32) | v13>>32 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-24) | v7>>24 + v3 += m[s[11]] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-32) | v14>>32 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-24) | v4>>24 + + v0 += m[s[12]] + v0 += v5 + v15 ^= v0 + v15 = v15<<(64-16) | v15>>16 + v10 += v15 + v5 ^= v10 + v5 = v5<<(64-63) | v5>>63 + v1 += m[s[13]] + v1 += v6 + v12 ^= v1 + v12 = v12<<(64-16) | v12>>16 + v11 += v12 + v6 ^= v11 + v6 = v6<<(64-63) | v6>>63 + v2 += m[s[14]] + v2 += v7 + v13 ^= v2 + v13 = v13<<(64-16) | v13>>16 + v8 += v13 + v7 ^= v8 + v7 = v7<<(64-63) | v7>>63 + v3 += m[s[15]] + v3 += v4 + v14 ^= v3 + v14 = v14<<(64-16) | v14>>16 + v9 += v14 + v4 ^= v9 + v4 = v4<<(64-63) | v4>>63 + + } + + h[0] ^= v0 ^ v8 + h[1] ^= v1 ^ v9 + h[2] ^= v2 ^ v10 + h[3] ^= v3 ^ v11 + h[4] ^= v4 ^ v12 + h[5] ^= v5 ^ v13 + h[6] ^= v6 ^ v14 + h[7] ^= v7 ^ v15 + } + c[0], c[1] = c0, c1 +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go new file mode 100644 index 000000000..da156a1ba --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go @@ -0,0 +1,11 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine gccgo + +package blake2b + +func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { + hashBlocksGeneric(h, c, flag, blocks) +} diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_test.go b/vendor/golang.org/x/crypto/blake2b/blake2b_test.go new file mode 100644 index 000000000..a38fceb20 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_test.go @@ -0,0 +1,448 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blake2b + +import ( + "bytes" + "encoding/hex" + "fmt" + "hash" + "testing" +) + +func fromHex(s string) []byte { + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func TestHashes(t *testing.T) { + defer func(sse4, avx, avx2 bool) { + useSSE4, useAVX, useAVX2 = sse4, useAVX, avx2 + }(useSSE4, useAVX, useAVX2) + + if useAVX2 { + t.Log("AVX2 version") + testHashes(t) + useAVX2 = false + } + if useAVX { + t.Log("AVX version") + testHashes(t) + useAVX = false + } + if useSSE4 { + t.Log("SSE4 version") + testHashes(t) + useSSE4 = false + } + t.Log("generic version") + testHashes(t) +} + +func testHashes(t *testing.T) { + key, _ := hex.DecodeString("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f") + + input := make([]byte, 255) + for i := range input { + input[i] = byte(i) + } + + for i, expectedHex := range hashes { + h, err := New512(key) + if err != nil { + t.Fatalf("#%d: error from New512: %v", i, err) + } + + h.Write(input[:i]) + sum := h.Sum(nil) + + if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex { + t.Fatalf("#%d (single write): got %s, wanted %s", i, gotHex, expectedHex) + } + + h.Reset() + for j := 0; j < i; j++ { + h.Write(input[j : j+1]) + } + + sum = h.Sum(sum[:0]) + if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex { + t.Fatalf("#%d (byte-by-byte): got %s, wanted %s", i, gotHex, expectedHex) + } + } +} + +func generateSequence(out []byte, seed uint32) { + a := 0xDEAD4BAD * seed // prime + b := uint32(1) + + for i := range out { // fill the buf + a, b = b, a+b + out[i] = byte(b >> 24) + } +} + +func computeMAC(msg []byte, hashSize int, key []byte) (sum []byte) { + var h hash.Hash + switch hashSize { + case Size: + h, _ = New512(key) + case Size384: + h, _ = New384(key) + case Size256: + h, _ = New256(key) + case 20: + h, _ = newDigest(20, key) + default: + panic("unexpected hashSize") + } + + h.Write(msg) + return h.Sum(sum) +} + +func computeHash(msg []byte, hashSize int) (sum []byte) { + switch hashSize { + case Size: + hash := Sum512(msg) + return hash[:] + case Size384: + hash := Sum384(msg) + return hash[:] + case Size256: + hash := Sum256(msg) + return hash[:] + case 20: + var hash [64]byte + checkSum(&hash, 20, msg) + return hash[:20] + default: + panic("unexpected hashSize") + } +} + +// Test function from RFC 7693. +func TestSelfTest(t *testing.T) { + hashLens := [4]int{20, 32, 48, 64} + msgLens := [6]int{0, 3, 128, 129, 255, 1024} + + msg := make([]byte, 1024) + key := make([]byte, 64) + + h, _ := New256(nil) + for _, hashSize := range hashLens { + for _, msgLength := range msgLens { + generateSequence(msg[:msgLength], uint32(msgLength)) // unkeyed hash + + md := computeHash(msg[:msgLength], hashSize) + h.Write(md) + + generateSequence(key[:], uint32(hashSize)) // keyed hash + md = computeMAC(msg[:msgLength], hashSize, key[:hashSize]) + h.Write(md) + } + } + + sum := h.Sum(nil) + expected := [32]byte{ + 0xc2, 0x3a, 0x78, 0x00, 0xd9, 0x81, 0x23, 0xbd, + 0x10, 0xf5, 0x06, 0xc6, 0x1e, 0x29, 0xda, 0x56, + 0x03, 0xd7, 0x63, 0xb8, 0xbb, 0xad, 0x2e, 0x73, + 0x7f, 0x5e, 0x76, 0x5a, 0x7b, 0xcc, 0xd4, 0x75, + } + if !bytes.Equal(sum, expected[:]) { + t.Fatalf("got %x, wanted %x", sum, expected) + } +} + +// Benchmarks + +func benchmarkSum(b *testing.B, size int) { + data := make([]byte, size) + b.SetBytes(int64(size)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Sum512(data) + } +} + +func benchmarkWrite(b *testing.B, size int) { + data := make([]byte, size) + h, _ := New512(nil) + b.SetBytes(int64(size)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + h.Write(data) + } +} + +func BenchmarkWrite128(b *testing.B) { benchmarkWrite(b, 128) } +func BenchmarkWrite1K(b *testing.B) { benchmarkWrite(b, 1024) } + +func BenchmarkSum128(b *testing.B) { benchmarkSum(b, 128) } +func BenchmarkSum1K(b *testing.B) { benchmarkSum(b, 1024) } + +// These values were taken from https://blake2.net/blake2b-test.txt. +var hashes = []string{ + "10ebb67700b1868efb4417987acf4690ae9d972fb7a590c2f02871799aaa4786b5e996e8f0f4eb981fc214b005f42d2ff4233499391653df7aefcbc13fc51568", + "961f6dd1e4dd30f63901690c512e78e4b45e4742ed197c3c5e45c549fd25f2e4187b0bc9fe30492b16b0d0bc4ef9b0f34c7003fac09a5ef1532e69430234cebd", + "da2cfbe2d8409a0f38026113884f84b50156371ae304c4430173d08a99d9fb1b983164a3770706d537f49e0c916d9f32b95cc37a95b99d857436f0232c88a965", + "33d0825dddf7ada99b0e7e307104ad07ca9cfd9692214f1561356315e784f3e5a17e364ae9dbb14cb2036df932b77f4b292761365fb328de7afdc6d8998f5fc1", + "beaa5a3d08f3807143cf621d95cd690514d0b49efff9c91d24b59241ec0eefa5f60196d407048bba8d2146828ebcb0488d8842fd56bb4f6df8e19c4b4daab8ac", + "098084b51fd13deae5f4320de94a688ee07baea2800486689a8636117b46c1f4c1f6af7f74ae7c857600456a58a3af251dc4723a64cc7c0a5ab6d9cac91c20bb", + "6044540d560853eb1c57df0077dd381094781cdb9073e5b1b3d3f6c7829e12066bbaca96d989a690de72ca3133a83652ba284a6d62942b271ffa2620c9e75b1f", + "7a8cfe9b90f75f7ecb3acc053aaed6193112b6f6a4aeeb3f65d3de541942deb9e2228152a3c4bbbe72fc3b12629528cfbb09fe630f0474339f54abf453e2ed52", + "380beaf6ea7cc9365e270ef0e6f3a64fb902acae51dd5512f84259ad2c91f4bc4108db73192a5bbfb0cbcf71e46c3e21aee1c5e860dc96e8eb0b7b8426e6abe9", + "60fe3c4535e1b59d9a61ea8500bfac41a69dffb1ceadd9aca323e9a625b64da5763bad7226da02b9c8c4f1a5de140ac5a6c1124e4f718ce0b28ea47393aa6637", + "4fe181f54ad63a2983feaaf77d1e7235c2beb17fa328b6d9505bda327df19fc37f02c4b6f0368ce23147313a8e5738b5fa2a95b29de1c7f8264eb77b69f585cd", + "f228773ce3f3a42b5f144d63237a72d99693adb8837d0e112a8a0f8ffff2c362857ac49c11ec740d1500749dac9b1f4548108bf3155794dcc9e4082849e2b85b", + "962452a8455cc56c8511317e3b1f3b2c37df75f588e94325fdd77070359cf63a9ae6e930936fdf8e1e08ffca440cfb72c28f06d89a2151d1c46cd5b268ef8563", + "43d44bfa18768c59896bf7ed1765cb2d14af8c260266039099b25a603e4ddc5039d6ef3a91847d1088d401c0c7e847781a8a590d33a3c6cb4df0fab1c2f22355", + "dcffa9d58c2a4ca2cdbb0c7aa4c4c1d45165190089f4e983bb1c2cab4aaeff1fa2b5ee516fecd780540240bf37e56c8bcca7fab980e1e61c9400d8a9a5b14ac6", + "6fbf31b45ab0c0b8dad1c0f5f4061379912dde5aa922099a030b725c73346c524291adef89d2f6fd8dfcda6d07dad811a9314536c2915ed45da34947e83de34e", + "a0c65bddde8adef57282b04b11e7bc8aab105b99231b750c021f4a735cb1bcfab87553bba3abb0c3e64a0b6955285185a0bd35fb8cfde557329bebb1f629ee93", + "f99d815550558e81eca2f96718aed10d86f3f1cfb675cce06b0eff02f617c5a42c5aa760270f2679da2677c5aeb94f1142277f21c7f79f3c4f0cce4ed8ee62b1", + "95391da8fc7b917a2044b3d6f5374e1ca072b41454d572c7356c05fd4bc1e0f40b8bb8b4a9f6bce9be2c4623c399b0dca0dab05cb7281b71a21b0ebcd9e55670", + "04b9cd3d20d221c09ac86913d3dc63041989a9a1e694f1e639a3ba7e451840f750c2fc191d56ad61f2e7936bc0ac8e094b60caeed878c18799045402d61ceaf9", + "ec0e0ef707e4ed6c0c66f9e089e4954b058030d2dd86398fe84059631f9ee591d9d77375355149178c0cf8f8e7c49ed2a5e4f95488a2247067c208510fadc44c", + "9a37cce273b79c09913677510eaf7688e89b3314d3532fd2764c39de022a2945b5710d13517af8ddc0316624e73bec1ce67df15228302036f330ab0cb4d218dd", + "4cf9bb8fb3d4de8b38b2f262d3c40f46dfe747e8fc0a414c193d9fcf753106ce47a18f172f12e8a2f1c26726545358e5ee28c9e2213a8787aafbc516d2343152", + "64e0c63af9c808fd893137129867fd91939d53f2af04be4fa268006100069b2d69daa5c5d8ed7fddcb2a70eeecdf2b105dd46a1e3b7311728f639ab489326bc9", + "5e9c93158d659b2def06b0c3c7565045542662d6eee8a96a89b78ade09fe8b3dcc096d4fe48815d88d8f82620156602af541955e1f6ca30dce14e254c326b88f", + "7775dff889458dd11aef417276853e21335eb88e4dec9cfb4e9edb49820088551a2ca60339f12066101169f0dfe84b098fddb148d9da6b3d613df263889ad64b", + "f0d2805afbb91f743951351a6d024f9353a23c7ce1fc2b051b3a8b968c233f46f50f806ecb1568ffaa0b60661e334b21dde04f8fa155ac740eeb42e20b60d764", + "86a2af316e7d7754201b942e275364ac12ea8962ab5bd8d7fb276dc5fbffc8f9a28cae4e4867df6780d9b72524160927c855da5b6078e0b554aa91e31cb9ca1d", + "10bdf0caa0802705e706369baf8a3f79d72c0a03a80675a7bbb00be3a45e516424d1ee88efb56f6d5777545ae6e27765c3a8f5e493fc308915638933a1dfee55", + "b01781092b1748459e2e4ec178696627bf4ebafebba774ecf018b79a68aeb84917bf0b84bb79d17b743151144cd66b7b33a4b9e52c76c4e112050ff5385b7f0b", + "c6dbc61dec6eaeac81e3d5f755203c8e220551534a0b2fd105a91889945a638550204f44093dd998c076205dffad703a0e5cd3c7f438a7e634cd59fededb539e", + "eba51acffb4cea31db4b8d87e9bf7dd48fe97b0253ae67aa580f9ac4a9d941f2bea518ee286818cc9f633f2a3b9fb68e594b48cdd6d515bf1d52ba6c85a203a7", + "86221f3ada52037b72224f105d7999231c5e5534d03da9d9c0a12acb68460cd375daf8e24386286f9668f72326dbf99ba094392437d398e95bb8161d717f8991", + "5595e05c13a7ec4dc8f41fb70cb50a71bce17c024ff6de7af618d0cc4e9c32d9570d6d3ea45b86525491030c0d8f2b1836d5778c1ce735c17707df364d054347", + "ce0f4f6aca89590a37fe034dd74dd5fa65eb1cbd0a41508aaddc09351a3cea6d18cb2189c54b700c009f4cbf0521c7ea01be61c5ae09cb54f27bc1b44d658c82", + "7ee80b06a215a3bca970c77cda8761822bc103d44fa4b33f4d07dcb997e36d55298bceae12241b3fa07fa63be5576068da387b8d5859aeab701369848b176d42", + "940a84b6a84d109aab208c024c6ce9647676ba0aaa11f86dbb7018f9fd2220a6d901a9027f9abcf935372727cbf09ebd61a2a2eeb87653e8ecad1bab85dc8327", + "2020b78264a82d9f4151141adba8d44bf20c5ec062eee9b595a11f9e84901bf148f298e0c9f8777dcdbc7cc4670aac356cc2ad8ccb1629f16f6a76bcefbee760", + "d1b897b0e075ba68ab572adf9d9c436663e43eb3d8e62d92fc49c9be214e6f27873fe215a65170e6bea902408a25b49506f47babd07cecf7113ec10c5dd31252", + "b14d0c62abfa469a357177e594c10c194243ed2025ab8aa5ad2fa41ad318e0ff48cd5e60bec07b13634a711d2326e488a985f31e31153399e73088efc86a5c55", + "4169c5cc808d2697dc2a82430dc23e3cd356dc70a94566810502b8d655b39abf9e7f902fe717e0389219859e1945df1af6ada42e4ccda55a197b7100a30c30a1", + "258a4edb113d66c839c8b1c91f15f35ade609f11cd7f8681a4045b9fef7b0b24c82cda06a5f2067b368825e3914e53d6948ede92efd6e8387fa2e537239b5bee", + "79d2d8696d30f30fb34657761171a11e6c3f1e64cbe7bebee159cb95bfaf812b4f411e2f26d9c421dc2c284a3342d823ec293849e42d1e46b0a4ac1e3c86abaa", + "8b9436010dc5dee992ae38aea97f2cd63b946d94fedd2ec9671dcde3bd4ce9564d555c66c15bb2b900df72edb6b891ebcadfeff63c9ea4036a998be7973981e7", + "c8f68e696ed28242bf997f5b3b34959508e42d613810f1e2a435c96ed2ff560c7022f361a9234b9837feee90bf47922ee0fd5f8ddf823718d86d1e16c6090071", + "b02d3eee4860d5868b2c39ce39bfe81011290564dd678c85e8783f29302dfc1399ba95b6b53cd9ebbf400cca1db0ab67e19a325f2d115812d25d00978ad1bca4", + "7693ea73af3ac4dad21ca0d8da85b3118a7d1c6024cfaf557699868217bc0c2f44a199bc6c0edd519798ba05bd5b1b4484346a47c2cadf6bf30b785cc88b2baf", + "a0e5c1c0031c02e48b7f09a5e896ee9aef2f17fc9e18e997d7f6cac7ae316422c2b1e77984e5f3a73cb45deed5d3f84600105e6ee38f2d090c7d0442ea34c46d", + "41daa6adcfdb69f1440c37b596440165c15ada596813e2e22f060fcd551f24dee8e04ba6890387886ceec4a7a0d7fc6b44506392ec3822c0d8c1acfc7d5aebe8", + "14d4d40d5984d84c5cf7523b7798b254e275a3a8cc0a1bd06ebc0bee726856acc3cbf516ff667cda2058ad5c3412254460a82c92187041363cc77a4dc215e487", + "d0e7a1e2b9a447fee83e2277e9ff8010c2f375ae12fa7aaa8ca5a6317868a26a367a0b69fbc1cf32a55d34eb370663016f3d2110230eba754028a56f54acf57c", + "e771aa8db5a3e043e8178f39a0857ba04a3f18e4aa05743cf8d222b0b095825350ba422f63382a23d92e4149074e816a36c1cd28284d146267940b31f8818ea2", + "feb4fd6f9e87a56bef398b3284d2bda5b5b0e166583a66b61e538457ff0584872c21a32962b9928ffab58de4af2edd4e15d8b35570523207ff4e2a5aa7754caa", + "462f17bf005fb1c1b9e671779f665209ec2873e3e411f98dabf240a1d5ec3f95ce6796b6fc23fe171903b502023467dec7273ff74879b92967a2a43a5a183d33", + "d3338193b64553dbd38d144bea71c5915bb110e2d88180dbc5db364fd6171df317fc7268831b5aef75e4342b2fad8797ba39eddcef80e6ec08159350b1ad696d", + "e1590d585a3d39f7cb599abd479070966409a6846d4377acf4471d065d5db94129cc9be92573b05ed226be1e9b7cb0cabe87918589f80dadd4ef5ef25a93d28e", + "f8f3726ac5a26cc80132493a6fedcb0e60760c09cfc84cad178175986819665e76842d7b9fedf76dddebf5d3f56faaad4477587af21606d396ae570d8e719af2", + "30186055c07949948183c850e9a756cc09937e247d9d928e869e20bafc3cd9721719d34e04a0899b92c736084550186886efba2e790d8be6ebf040b209c439a4", + "f3c4276cb863637712c241c444c5cc1e3554e0fddb174d035819dd83eb700b4ce88df3ab3841ba02085e1a99b4e17310c5341075c0458ba376c95a6818fbb3e2", + "0aa007c4dd9d5832393040a1583c930bca7dc5e77ea53add7e2b3f7c8e231368043520d4a3ef53c969b6bbfd025946f632bd7f765d53c21003b8f983f75e2a6a", + "08e9464720533b23a04ec24f7ae8c103145f765387d738777d3d343477fd1c58db052142cab754ea674378e18766c53542f71970171cc4f81694246b717d7564", + "d37ff7ad297993e7ec21e0f1b4b5ae719cdc83c5db687527f27516cbffa822888a6810ee5c1ca7bfe3321119be1ab7bfa0a502671c8329494df7ad6f522d440f", + "dd9042f6e464dcf86b1262f6accfafbd8cfd902ed3ed89abf78ffa482dbdeeb6969842394c9a1168ae3d481a017842f660002d42447c6b22f7b72f21aae021c9", + "bd965bf31e87d70327536f2a341cebc4768eca275fa05ef98f7f1b71a0351298de006fba73fe6733ed01d75801b4a928e54231b38e38c562b2e33ea1284992fa", + "65676d800617972fbd87e4b9514e1c67402b7a331096d3bfac22f1abb95374abc942f16e9ab0ead33b87c91968a6e509e119ff07787b3ef483e1dcdccf6e3022", + "939fa189699c5d2c81ddd1ffc1fa207c970b6a3685bb29ce1d3e99d42f2f7442da53e95a72907314f4588399a3ff5b0a92beb3f6be2694f9f86ecf2952d5b41c", + "c516541701863f91005f314108ceece3c643e04fc8c42fd2ff556220e616aaa6a48aeb97a84bad74782e8dff96a1a2fa949339d722edcaa32b57067041df88cc", + "987fd6e0d6857c553eaebb3d34970a2c2f6e89a3548f492521722b80a1c21a153892346d2cba6444212d56da9a26e324dccbc0dcde85d4d2ee4399eec5a64e8f", + "ae56deb1c2328d9c4017706bce6e99d41349053ba9d336d677c4c27d9fd50ae6aee17e853154e1f4fe7672346da2eaa31eea53fcf24a22804f11d03da6abfc2b", + "49d6a608c9bde4491870498572ac31aac3fa40938b38a7818f72383eb040ad39532bc06571e13d767e6945ab77c0bdc3b0284253343f9f6c1244ebf2ff0df866", + "da582ad8c5370b4469af862aa6467a2293b2b28bd80ae0e91f425ad3d47249fdf98825cc86f14028c3308c9804c78bfeeeee461444ce243687e1a50522456a1d", + "d5266aa3331194aef852eed86d7b5b2633a0af1c735906f2e13279f14931a9fc3b0eac5ce9245273bd1aa92905abe16278ef7efd47694789a7283b77da3c70f8", + "2962734c28252186a9a1111c732ad4de4506d4b4480916303eb7991d659ccda07a9911914bc75c418ab7a4541757ad054796e26797feaf36e9f6ad43f14b35a4", + "e8b79ec5d06e111bdfafd71e9f5760f00ac8ac5d8bf768f9ff6f08b8f026096b1cc3a4c973333019f1e3553e77da3f98cb9f542e0a90e5f8a940cc58e59844b3", + "dfb320c44f9d41d1efdcc015f08dd5539e526e39c87d509ae6812a969e5431bf4fa7d91ffd03b981e0d544cf72d7b1c0374f8801482e6dea2ef903877eba675e", + "d88675118fdb55a5fb365ac2af1d217bf526ce1ee9c94b2f0090b2c58a06ca58187d7fe57c7bed9d26fca067b4110eefcd9a0a345de872abe20de368001b0745", + "b893f2fc41f7b0dd6e2f6aa2e0370c0cff7df09e3acfcc0e920b6e6fad0ef747c40668417d342b80d2351e8c175f20897a062e9765e6c67b539b6ba8b9170545", + "6c67ec5697accd235c59b486d7b70baeedcbd4aa64ebd4eef3c7eac189561a726250aec4d48cadcafbbe2ce3c16ce2d691a8cce06e8879556d4483ed7165c063", + "f1aa2b044f8f0c638a3f362e677b5d891d6fd2ab0765f6ee1e4987de057ead357883d9b405b9d609eea1b869d97fb16d9b51017c553f3b93c0a1e0f1296fedcd", + "cbaa259572d4aebfc1917acddc582b9f8dfaa928a198ca7acd0f2aa76a134a90252e6298a65b08186a350d5b7626699f8cb721a3ea5921b753ae3a2dce24ba3a", + "fa1549c9796cd4d303dcf452c1fbd5744fd9b9b47003d920b92de34839d07ef2a29ded68f6fc9e6c45e071a2e48bd50c5084e96b657dd0404045a1ddefe282ed", + "5cf2ac897ab444dcb5c8d87c495dbdb34e1838b6b629427caa51702ad0f9688525f13bec503a3c3a2c80a65e0b5715e8afab00ffa56ec455a49a1ad30aa24fcd", + "9aaf80207bace17bb7ab145757d5696bde32406ef22b44292ef65d4519c3bb2ad41a59b62cc3e94b6fa96d32a7faadae28af7d35097219aa3fd8cda31e40c275", + "af88b163402c86745cb650c2988fb95211b94b03ef290eed9662034241fd51cf398f8073e369354c43eae1052f9b63b08191caa138aa54fea889cc7024236897", + "48fa7d64e1ceee27b9864db5ada4b53d00c9bc7626555813d3cd6730ab3cc06ff342d727905e33171bde6e8476e77fb1720861e94b73a2c538d254746285f430", + "0e6fd97a85e904f87bfe85bbeb34f69e1f18105cf4ed4f87aec36c6e8b5f68bd2a6f3dc8a9ecb2b61db4eedb6b2ea10bf9cb0251fb0f8b344abf7f366b6de5ab", + "06622da5787176287fdc8fed440bad187d830099c94e6d04c8e9c954cda70c8bb9e1fc4a6d0baa831b9b78ef6648681a4867a11da93ee36e5e6a37d87fc63f6f", + "1da6772b58fabf9c61f68d412c82f182c0236d7d575ef0b58dd22458d643cd1dfc93b03871c316d8430d312995d4197f0874c99172ba004a01ee295abac24e46", + "3cd2d9320b7b1d5fb9aab951a76023fa667be14a9124e394513918a3f44096ae4904ba0ffc150b63bc7ab1eeb9a6e257e5c8f000a70394a5afd842715de15f29", + "04cdc14f7434e0b4be70cb41db4c779a88eaef6accebcb41f2d42fffe7f32a8e281b5c103a27021d0d08362250753cdf70292195a53a48728ceb5844c2d98bab", + "9071b7a8a075d0095b8fb3ae5113785735ab98e2b52faf91d5b89e44aac5b5d4ebbf91223b0ff4c71905da55342e64655d6ef8c89a4768c3f93a6dc0366b5bc8", + "ebb30240dd96c7bc8d0abe49aa4edcbb4afdc51ff9aaf720d3f9e7fbb0f9c6d6571350501769fc4ebd0b2141247ff400d4fd4be414edf37757bb90a32ac5c65a", + "8532c58bf3c8015d9d1cbe00eef1f5082f8f3632fbe9f1ed4f9dfb1fa79e8283066d77c44c4af943d76b300364aecbd0648c8a8939bd204123f4b56260422dec", + "fe9846d64f7c7708696f840e2d76cb4408b6595c2f81ec6a28a7f2f20cb88cfe6ac0b9e9b8244f08bd7095c350c1d0842f64fb01bb7f532dfcd47371b0aeeb79", + "28f17ea6fb6c42092dc264257e29746321fb5bdaea9873c2a7fa9d8f53818e899e161bc77dfe8090afd82bf2266c5c1bc930a8d1547624439e662ef695f26f24", + "ec6b7d7f030d4850acae3cb615c21dd25206d63e84d1db8d957370737ba0e98467ea0ce274c66199901eaec18a08525715f53bfdb0aacb613d342ebdceeddc3b", + "b403d3691c03b0d3418df327d5860d34bbfcc4519bfbce36bf33b208385fadb9186bc78a76c489d89fd57e7dc75412d23bcd1dae8470ce9274754bb8585b13c5", + "31fc79738b8772b3f55cd8178813b3b52d0db5a419d30ba9495c4b9da0219fac6df8e7c23a811551a62b827f256ecdb8124ac8a6792ccfecc3b3012722e94463", + "bb2039ec287091bcc9642fc90049e73732e02e577e2862b32216ae9bedcd730c4c284ef3968c368b7d37584f97bd4b4dc6ef6127acfe2e6ae2509124e66c8af4", + "f53d68d13f45edfcb9bd415e2831e938350d5380d3432278fc1c0c381fcb7c65c82dafe051d8c8b0d44e0974a0e59ec7bf7ed0459f86e96f329fc79752510fd3", + "8d568c7984f0ecdf7640fbc483b5d8c9f86634f6f43291841b309a350ab9c1137d24066b09da9944bac54d5bb6580d836047aac74ab724b887ebf93d4b32eca9", + "c0b65ce5a96ff774c456cac3b5f2c4cd359b4ff53ef93a3da0778be4900d1e8da1601e769e8f1b02d2a2f8c5b9fa10b44f1c186985468feeb008730283a6657d", + "4900bba6f5fb103ece8ec96ada13a5c3c85488e05551da6b6b33d988e611ec0fe2e3c2aa48ea6ae8986a3a231b223c5d27cec2eadde91ce07981ee652862d1e4", + "c7f5c37c7285f927f76443414d4357ff789647d7a005a5a787e03c346b57f49f21b64fa9cf4b7e45573e23049017567121a9c3d4b2b73ec5e9413577525db45a", + "ec7096330736fdb2d64b5653e7475da746c23a4613a82687a28062d3236364284ac01720ffb406cfe265c0df626a188c9e5963ace5d3d5bb363e32c38c2190a6", + "82e744c75f4649ec52b80771a77d475a3bc091989556960e276a5f9ead92a03f718742cdcfeaee5cb85c44af198adc43a4a428f5f0c2ddb0be36059f06d7df73", + "2834b7a7170f1f5b68559ab78c1050ec21c919740b784a9072f6e5d69f828d70c919c5039fb148e39e2c8a52118378b064ca8d5001cd10a5478387b966715ed6", + "16b4ada883f72f853bb7ef253efcab0c3e2161687ad61543a0d2824f91c1f81347d86be709b16996e17f2dd486927b0288ad38d13063c4a9672c39397d3789b6", + "78d048f3a69d8b54ae0ed63a573ae350d89f7c6cf1f3688930de899afa037697629b314e5cd303aa62feea72a25bf42b304b6c6bcb27fae21c16d925e1fbdac3", + "0f746a48749287ada77a82961f05a4da4abdb7d77b1220f836d09ec814359c0ec0239b8c7b9ff9e02f569d1b301ef67c4612d1de4f730f81c12c40cc063c5caa", + "f0fc859d3bd195fbdc2d591e4cdac15179ec0f1dc821c11df1f0c1d26e6260aaa65b79fafacafd7d3ad61e600f250905f5878c87452897647a35b995bcadc3a3", + "2620f687e8625f6a412460b42e2cef67634208ce10a0cbd4dff7044a41b7880077e9f8dc3b8d1216d3376a21e015b58fb279b521d83f9388c7382c8505590b9b", + "227e3aed8d2cb10b918fcb04f9de3e6d0a57e08476d93759cd7b2ed54a1cbf0239c528fb04bbf288253e601d3bc38b21794afef90b17094a182cac557745e75f", + "1a929901b09c25f27d6b35be7b2f1c4745131fdebca7f3e2451926720434e0db6e74fd693ad29b777dc3355c592a361c4873b01133a57c2e3b7075cbdb86f4fc", + "5fd7968bc2fe34f220b5e3dc5af9571742d73b7d60819f2888b629072b96a9d8ab2d91b82d0a9aaba61bbd39958132fcc4257023d1eca591b3054e2dc81c8200", + "dfcce8cf32870cc6a503eadafc87fd6f78918b9b4d0737db6810be996b5497e7e5cc80e312f61e71ff3e9624436073156403f735f56b0b01845c18f6caf772e6", + "02f7ef3a9ce0fff960f67032b296efca3061f4934d690749f2d01c35c81c14f39a67fa350bc8a0359bf1724bffc3bca6d7c7bba4791fd522a3ad353c02ec5aa8", + "64be5c6aba65d594844ae78bb022e5bebe127fd6b6ffa5a13703855ab63b624dcd1a363f99203f632ec386f3ea767fc992e8ed9686586aa27555a8599d5b808f", + "f78585505c4eaa54a8b5be70a61e735e0ff97af944ddb3001e35d86c4e2199d976104b6ae31750a36a726ed285064f5981b503889fef822fcdc2898dddb7889a", + "e4b5566033869572edfd87479a5bb73c80e8759b91232879d96b1dda36c012076ee5a2ed7ae2de63ef8406a06aea82c188031b560beafb583fb3de9e57952a7e", + "e1b3e7ed867f6c9484a2a97f7715f25e25294e992e41f6a7c161ffc2adc6daaeb7113102d5e6090287fe6ad94ce5d6b739c6ca240b05c76fb73f25dd024bf935", + "85fd085fdc12a080983df07bd7012b0d402a0f4043fcb2775adf0bad174f9b08d1676e476985785c0a5dcc41dbff6d95ef4d66a3fbdc4a74b82ba52da0512b74", + "aed8fa764b0fbff821e05233d2f7b0900ec44d826f95e93c343c1bc3ba5a24374b1d616e7e7aba453a0ada5e4fab5382409e0d42ce9c2bc7fb39a99c340c20f0", + "7ba3b2e297233522eeb343bd3ebcfd835a04007735e87f0ca300cbee6d416565162171581e4020ff4cf176450f1291ea2285cb9ebffe4c56660627685145051c", + "de748bcf89ec88084721e16b85f30adb1a6134d664b5843569babc5bbd1a15ca9b61803c901a4fef32965a1749c9f3a4e243e173939dc5a8dc495c671ab52145", + "aaf4d2bdf200a919706d9842dce16c98140d34bc433df320aba9bd429e549aa7a3397652a4d768277786cf993cde2338673ed2e6b66c961fefb82cd20c93338f", + "c408218968b788bf864f0997e6bc4c3dba68b276e2125a4843296052ff93bf5767b8cdce7131f0876430c1165fec6c4f47adaa4fd8bcfacef463b5d3d0fa61a0", + "76d2d819c92bce55fa8e092ab1bf9b9eab237a25267986cacf2b8ee14d214d730dc9a5aa2d7b596e86a1fd8fa0804c77402d2fcd45083688b218b1cdfa0dcbcb", + "72065ee4dd91c2d8509fa1fc28a37c7fc9fa7d5b3f8ad3d0d7a25626b57b1b44788d4caf806290425f9890a3a2a35a905ab4b37acfd0da6e4517b2525c9651e4", + "64475dfe7600d7171bea0b394e27c9b00d8e74dd1e416a79473682ad3dfdbb706631558055cfc8a40e07bd015a4540dcdea15883cbbf31412df1de1cd4152b91", + "12cd1674a4488a5d7c2b3160d2e2c4b58371bedad793418d6f19c6ee385d70b3e06739369d4df910edb0b0a54cbff43d54544cd37ab3a06cfa0a3ddac8b66c89", + "60756966479dedc6dd4bcff8ea7d1d4ce4d4af2e7b097e32e3763518441147cc12b3c0ee6d2ecabf1198cec92e86a3616fba4f4e872f5825330adbb4c1dee444", + "a7803bcb71bc1d0f4383dde1e0612e04f872b715ad30815c2249cf34abb8b024915cb2fc9f4e7cc4c8cfd45be2d5a91eab0941c7d270e2da4ca4a9f7ac68663a", + "b84ef6a7229a34a750d9a98ee2529871816b87fbe3bc45b45fa5ae82d5141540211165c3c5d7a7476ba5a4aa06d66476f0d9dc49a3f1ee72c3acabd498967414", + "fae4b6d8efc3f8c8e64d001dabec3a21f544e82714745251b2b4b393f2f43e0da3d403c64db95a2cb6e23ebb7b9e94cdd5ddac54f07c4a61bd3cb10aa6f93b49", + "34f7286605a122369540141ded79b8957255da2d4155abbf5a8dbb89c8eb7ede8eeef1daa46dc29d751d045dc3b1d658bb64b80ff8589eddb3824b13da235a6b", + "3b3b48434be27b9eababba43bf6b35f14b30f6a88dc2e750c358470d6b3aa3c18e47db4017fa55106d8252f016371a00f5f8b070b74ba5f23cffc5511c9f09f0", + "ba289ebd6562c48c3e10a8ad6ce02e73433d1e93d7c9279d4d60a7e879ee11f441a000f48ed9f7c4ed87a45136d7dccdca482109c78a51062b3ba4044ada2469", + "022939e2386c5a37049856c850a2bb10a13dfea4212b4c732a8840a9ffa5faf54875c5448816b2785a007da8a8d2bc7d71a54e4e6571f10b600cbdb25d13ede3", + "e6fec19d89ce8717b1a087024670fe026f6c7cbda11caef959bb2d351bf856f8055d1c0ebdaaa9d1b17886fc2c562b5e99642fc064710c0d3488a02b5ed7f6fd", + "94c96f02a8f576aca32ba61c2b206f907285d9299b83ac175c209a8d43d53bfe683dd1d83e7549cb906c28f59ab7c46f8751366a28c39dd5fe2693c9019666c8", + "31a0cd215ebd2cb61de5b9edc91e6195e31c59a5648d5c9f737e125b2605708f2e325ab3381c8dce1a3e958886f1ecdc60318f882cfe20a24191352e617b0f21", + "91ab504a522dce78779f4c6c6ba2e6b6db5565c76d3e7e7c920caf7f757ef9db7c8fcf10e57f03379ea9bf75eb59895d96e149800b6aae01db778bb90afbc989", + "d85cabc6bd5b1a01a5afd8c6734740da9fd1c1acc6db29bfc8a2e5b668b028b6b3154bfb8703fa3180251d589ad38040ceb707c4bad1b5343cb426b61eaa49c1", + "d62efbec2ca9c1f8bd66ce8b3f6a898cb3f7566ba6568c618ad1feb2b65b76c3ce1dd20f7395372faf28427f61c9278049cf0140df434f5633048c86b81e0399", + "7c8fdc6175439e2c3db15bafa7fb06143a6a23bc90f449e79deef73c3d492a671715c193b6fea9f036050b946069856b897e08c00768f5ee5ddcf70b7cd6d0e0", + "58602ee7468e6bc9df21bd51b23c005f72d6cb013f0a1b48cbec5eca299299f97f09f54a9a01483eaeb315a6478bad37ba47ca1347c7c8fc9e6695592c91d723", + "27f5b79ed256b050993d793496edf4807c1d85a7b0a67c9c4fa99860750b0ae66989670a8ffd7856d7ce411599e58c4d77b232a62bef64d15275be46a68235ff", + "3957a976b9f1887bf004a8dca942c92d2b37ea52600f25e0c9bc5707d0279c00c6e85a839b0d2d8eb59c51d94788ebe62474a791cadf52cccf20f5070b6573fc", + "eaa2376d55380bf772ecca9cb0aa4668c95c707162fa86d518c8ce0ca9bf7362b9f2a0adc3ff59922df921b94567e81e452f6c1a07fc817cebe99604b3505d38", + "c1e2c78b6b2734e2480ec550434cb5d613111adcc21d475545c3b1b7e6ff12444476e5c055132e2229dc0f807044bb919b1a5662dd38a9ee65e243a3911aed1a", + "8ab48713389dd0fcf9f965d3ce66b1e559a1f8c58741d67683cd971354f452e62d0207a65e436c5d5d8f8ee71c6abfe50e669004c302b31a7ea8311d4a916051", + "24ce0addaa4c65038bd1b1c0f1452a0b128777aabc94a29df2fd6c7e2f85f8ab9ac7eff516b0e0a825c84a24cfe492eaad0a6308e46dd42fe8333ab971bb30ca", + "5154f929ee03045b6b0c0004fa778edee1d139893267cc84825ad7b36c63de32798e4a166d24686561354f63b00709a1364b3c241de3febf0754045897467cd4", + "e74e907920fd87bd5ad636dd11085e50ee70459c443e1ce5809af2bc2eba39f9e6d7128e0e3712c316da06f4705d78a4838e28121d4344a2c79c5e0db307a677", + "bf91a22334bac20f3fd80663b3cd06c4e8802f30e6b59f90d3035cc9798a217ed5a31abbda7fa6842827bdf2a7a1c21f6fcfccbb54c6c52926f32da816269be1", + "d9d5c74be5121b0bd742f26bffb8c89f89171f3f934913492b0903c271bbe2b3395ef259669bef43b57f7fcc3027db01823f6baee66e4f9fead4d6726c741fce", + "50c8b8cf34cd879f80e2faab3230b0c0e1cc3e9dcadeb1b9d97ab923415dd9a1fe38addd5c11756c67990b256e95ad6d8f9fedce10bf1c90679cde0ecf1be347", + "0a386e7cd5dd9b77a035e09fe6fee2c8ce61b5383c87ea43205059c5e4cd4f4408319bb0a82360f6a58e6c9ce3f487c446063bf813bc6ba535e17fc1826cfc91", + "1f1459cb6b61cbac5f0efe8fc487538f42548987fcd56221cfa7beb22504769e792c45adfb1d6b3d60d7b749c8a75b0bdf14e8ea721b95dca538ca6e25711209", + "e58b3836b7d8fedbb50ca5725c6571e74c0785e97821dab8b6298c10e4c079d4a6cdf22f0fedb55032925c16748115f01a105e77e00cee3d07924dc0d8f90659", + "b929cc6505f020158672deda56d0db081a2ee34c00c1100029bdf8ea98034fa4bf3e8655ec697fe36f40553c5bb46801644a627d3342f4fc92b61f03290fb381", + "72d353994b49d3e03153929a1e4d4f188ee58ab9e72ee8e512f29bc773913819ce057ddd7002c0433ee0a16114e3d156dd2c4a7e80ee53378b8670f23e33ef56", + "c70ef9bfd775d408176737a0736d68517ce1aaad7e81a93c8c1ed967ea214f56c8a377b1763e676615b60f3988241eae6eab9685a5124929d28188f29eab06f7", + "c230f0802679cb33822ef8b3b21bf7a9a28942092901d7dac3760300831026cf354c9232df3e084d9903130c601f63c1f4a4a4b8106e468cd443bbe5a734f45f", + "6f43094cafb5ebf1f7a4937ec50f56a4c9da303cbb55ac1f27f1f1976cd96beda9464f0e7b9c54620b8a9fba983164b8be3578425a024f5fe199c36356b88972", + "3745273f4c38225db2337381871a0c6aafd3af9b018c88aa02025850a5dc3a42a1a3e03e56cbf1b0876d63a441f1d2856a39b8801eb5af325201c415d65e97fe", + "c50c44cca3ec3edaae779a7e179450ebdda2f97067c690aa6c5a4ac7c30139bb27c0df4db3220e63cb110d64f37ffe078db72653e2daacf93ae3f0a2d1a7eb2e", + "8aef263e385cbc61e19b28914243262af5afe8726af3ce39a79c27028cf3ecd3f8d2dfd9cfc9ad91b58f6f20778fd5f02894a3d91c7d57d1e4b866a7f364b6be", + "28696141de6e2d9bcb3235578a66166c1448d3e905a1b482d423be4bc5369bc8c74dae0acc9cc123e1d8ddce9f97917e8c019c552da32d39d2219b9abf0fa8c8", + "2fb9eb2085830181903a9dafe3db428ee15be7662224efd643371fb25646aee716e531eca69b2bdc8233f1a8081fa43da1500302975a77f42fa592136710e9dc", + "66f9a7143f7a3314a669bf2e24bbb35014261d639f495b6c9c1f104fe8e320aca60d4550d69d52edbd5a3cdeb4014ae65b1d87aa770b69ae5c15f4330b0b0ad8", + "f4c4dd1d594c3565e3e25ca43dad82f62abea4835ed4cd811bcd975e46279828d44d4c62c3679f1b7f7b9dd4571d7b49557347b8c5460cbdc1bef690fb2a08c0", + "8f1dc9649c3a84551f8f6e91cac68242a43b1f8f328ee92280257387fa7559aa6db12e4aeadc2d26099178749c6864b357f3f83b2fb3efa8d2a8db056bed6bcc", + "3139c1a7f97afd1675d460ebbc07f2728aa150df849624511ee04b743ba0a833092f18c12dc91b4dd243f333402f59fe28abdbbbae301e7b659c7a26d5c0f979", + "06f94a2996158a819fe34c40de3cf0379fd9fb85b3e363ba3926a0e7d960e3f4c2e0c70c7ce0ccb2a64fc29869f6e7ab12bd4d3f14fce943279027e785fb5c29", + "c29c399ef3eee8961e87565c1ce263925fc3d0ce267d13e48dd9e732ee67b0f69fad56401b0f10fcaac119201046cca28c5b14abdea3212ae65562f7f138db3d", + "4cec4c9df52eef05c3f6faaa9791bc7445937183224ecc37a1e58d0132d35617531d7e795f52af7b1eb9d147de1292d345fe341823f8e6bc1e5badca5c656108", + "898bfbae93b3e18d00697eab7d9704fa36ec339d076131cefdf30edbe8d9cc81c3a80b129659b163a323bab9793d4feed92d54dae966c77529764a09be88db45", + "ee9bd0469d3aaf4f14035be48a2c3b84d9b4b1fff1d945e1f1c1d38980a951be197b25fe22c731f20aeacc930ba9c4a1f4762227617ad350fdabb4e80273a0f4", + "3d4d3113300581cd96acbf091c3d0f3c310138cd6979e6026cde623e2dd1b24d4a8638bed1073344783ad0649cc6305ccec04beb49f31c633088a99b65130267", + "95c0591ad91f921ac7be6d9ce37e0663ed8011c1cfd6d0162a5572e94368bac02024485e6a39854aa46fe38e97d6c6b1947cd272d86b06bb5b2f78b9b68d559d", + "227b79ded368153bf46c0a3ca978bfdbef31f3024a5665842468490b0ff748ae04e7832ed4c9f49de9b1706709d623e5c8c15e3caecae8d5e433430ff72f20eb", + "5d34f3952f0105eef88ae8b64c6ce95ebfade0e02c69b08762a8712d2e4911ad3f941fc4034dc9b2e479fdbcd279b902faf5d838bb2e0c6495d372b5b7029813", + "7f939bf8353abce49e77f14f3750af20b7b03902e1a1e7fb6aaf76d0259cd401a83190f15640e74f3e6c5a90e839c7821f6474757f75c7bf9002084ddc7a62dc", + "062b61a2f9a33a71d7d0a06119644c70b0716a504de7e5e1be49bd7b86e7ed6817714f9f0fc313d06129597e9a2235ec8521de36f7290a90ccfc1ffa6d0aee29", + "f29e01eeae64311eb7f1c6422f946bf7bea36379523e7b2bbaba7d1d34a22d5ea5f1c5a09d5ce1fe682cced9a4798d1a05b46cd72dff5c1b355440b2a2d476bc", + "ec38cd3bbab3ef35d7cb6d5c914298351d8a9dc97fcee051a8a02f58e3ed6184d0b7810a5615411ab1b95209c3c810114fdeb22452084e77f3f847c6dbaafe16", + "c2aef5e0ca43e82641565b8cb943aa8ba53550caef793b6532fafad94b816082f0113a3ea2f63608ab40437ecc0f0229cb8fa224dcf1c478a67d9b64162b92d1", + "15f534efff7105cd1c254d074e27d5898b89313b7d366dc2d7d87113fa7d53aae13f6dba487ad8103d5e854c91fdb6e1e74b2ef6d1431769c30767dde067a35c", + "89acbca0b169897a0a2714c2df8c95b5b79cb69390142b7d6018bb3e3076b099b79a964152a9d912b1b86412b7e372e9cecad7f25d4cbab8a317be36492a67d7", + "e3c0739190ed849c9c962fd9dbb55e207e624fcac1eb417691515499eea8d8267b7e8f1287a63633af5011fde8c4ddf55bfdf722edf88831414f2cfaed59cb9a", + "8d6cf87c08380d2d1506eee46fd4222d21d8c04e585fbfd08269c98f702833a156326a0724656400ee09351d57b440175e2a5de93cc5f80db6daf83576cf75fa", + "da24bede383666d563eeed37f6319baf20d5c75d1635a6ba5ef4cfa1ac95487e96f8c08af600aab87c986ebad49fc70a58b4890b9c876e091016daf49e1d322e", + "f9d1d1b1e87ea7ae753a029750cc1cf3d0157d41805e245c5617bb934e732f0ae3180b78e05bfe76c7c3051e3e3ac78b9b50c05142657e1e03215d6ec7bfd0fc", + "11b7bc1668032048aa43343de476395e814bbbc223678db951a1b03a021efac948cfbe215f97fe9a72a2f6bc039e3956bfa417c1a9f10d6d7ba5d3d32ff323e5", + "b8d9000e4fc2b066edb91afee8e7eb0f24e3a201db8b6793c0608581e628ed0bcc4e5aa6787992a4bcc44e288093e63ee83abd0bc3ec6d0934a674a4da13838a", + "ce325e294f9b6719d6b61278276ae06a2564c03bb0b783fafe785bdf89c7d5acd83e78756d301b445699024eaeb77b54d477336ec2a4f332f2b3f88765ddb0c3", + "29acc30e9603ae2fccf90bf97e6cc463ebe28c1b2f9b4b765e70537c25c702a29dcbfbf14c99c54345ba2b51f17b77b5f15db92bbad8fa95c471f5d070a137cc", + "3379cbaae562a87b4c0425550ffdd6bfe1203f0d666cc7ea095be407a5dfe61ee91441cd5154b3e53b4f5fb31ad4c7a9ad5c7af4ae679aa51a54003a54ca6b2d", + "3095a349d245708c7cf550118703d7302c27b60af5d4e67fc978f8a4e60953c7a04f92fcf41aee64321ccb707a895851552b1e37b00bc5e6b72fa5bcef9e3fff", + "07262d738b09321f4dbccec4bb26f48cb0f0ed246ce0b31b9a6e7bc683049f1f3e5545f28ce932dd985c5ab0f43bd6de0770560af329065ed2e49d34624c2cbb", + "b6405eca8ee3316c87061cc6ec18dba53e6c250c63ba1f3bae9e55dd3498036af08cd272aa24d713c6020d77ab2f3919af1a32f307420618ab97e73953994fb4", + "7ee682f63148ee45f6e5315da81e5c6e557c2c34641fc509c7a5701088c38a74756168e2cd8d351e88fd1a451f360a01f5b2580f9b5a2e8cfc138f3dd59a3ffc", + "1d263c179d6b268f6fa016f3a4f29e943891125ed8593c81256059f5a7b44af2dcb2030d175c00e62ecaf7ee96682aa07ab20a611024a28532b1c25b86657902", + "106d132cbdb4cd2597812846e2bc1bf732fec5f0a5f65dbb39ec4e6dc64ab2ce6d24630d0f15a805c3540025d84afa98e36703c3dbee713e72dde8465bc1be7e", + "0e79968226650667a8d862ea8da4891af56a4e3a8b6d1750e394f0dea76d640d85077bcec2cc86886e506751b4f6a5838f7f0b5fef765d9dc90dcdcbaf079f08", + "521156a82ab0c4e566e5844d5e31ad9aaf144bbd5a464fdca34dbd5717e8ff711d3ffebbfa085d67fe996a34f6d3e4e60b1396bf4b1610c263bdbb834d560816", + "1aba88befc55bc25efbce02db8b9933e46f57661baeabeb21cc2574d2a518a3cba5dc5a38e49713440b25f9c744e75f6b85c9d8f4681f676160f6105357b8406", + "5a9949fcb2c473cda968ac1b5d08566dc2d816d960f57e63b898fa701cf8ebd3f59b124d95bfbbedc5f1cf0e17d5eaed0c02c50b69d8a402cabcca4433b51fd4", + "b0cead09807c672af2eb2b0f06dde46cf5370e15a4096b1a7d7cbb36ec31c205fbefca00b7a4162fa89fb4fb3eb78d79770c23f44e7206664ce3cd931c291e5d", + "bb6664931ec97044e45b2ae420ae1c551a8874bc937d08e969399c3964ebdba8346cdd5d09caafe4c28ba7ec788191ceca65ddd6f95f18583e040d0f30d0364d", + "65bc770a5faa3792369803683e844b0be7ee96f29f6d6a35568006bd5590f9a4ef639b7a8061c7b0424b66b60ac34af3119905f33a9d8c3ae18382ca9b689900", + "ea9b4dca333336aaf839a45c6eaa48b8cb4c7ddabffea4f643d6357ea6628a480a5b45f2b052c1b07d1fedca918b6f1139d80f74c24510dcbaa4be70eacc1b06", + "e6342fb4a780ad975d0e24bce149989b91d360557e87994f6b457b895575cc02d0c15bad3ce7577f4c63927ff13f3e381ff7e72bdbe745324844a9d27e3f1c01", + "3e209c9b33e8e461178ab46b1c64b49a07fb745f1c8bc95fbfb94c6b87c69516651b264ef980937fad41238b91ddc011a5dd777c7efd4494b4b6ecd3a9c22ac0", + "fd6a3d5b1875d80486d6e69694a56dbb04a99a4d051f15db2689776ba1c4882e6d462a603b7015dc9f4b7450f05394303b8652cfb404a266962c41bae6e18a94", + "951e27517e6bad9e4195fc8671dee3e7e9be69cee1422cb9fecfce0dba875f7b310b93ee3a3d558f941f635f668ff832d2c1d033c5e2f0997e4c66f147344e02", + "8eba2f874f1ae84041903c7c4253c82292530fc8509550bfdc34c95c7e2889d5650b0ad8cb988e5c4894cb87fbfbb19612ea93ccc4c5cad17158b9763464b492", + "16f712eaa1b7c6354719a8e7dbdfaf55e4063a4d277d947550019b38dfb564830911057d50506136e2394c3b28945cc964967d54e3000c2181626cfb9b73efd2", + "c39639e7d5c7fb8cdd0fd3e6a52096039437122f21c78f1679cea9d78a734c56ecbeb28654b4f18e342c331f6f7229ec4b4bc281b2d80a6eb50043f31796c88c", + "72d081af99f8a173dcc9a0ac4eb3557405639a29084b54a40172912a2f8a395129d5536f0918e902f9e8fa6000995f4168ddc5f893011be6a0dbc9b8a1a3f5bb", + "c11aa81e5efd24d5fc27ee586cfd8847fbb0e27601ccece5ecca0198e3c7765393bb74457c7e7a27eb9170350e1fb53857177506be3e762cc0f14d8c3afe9077", + "c28f2150b452e6c0c424bcde6f8d72007f9310fed7f2f87de0dbb64f4479d6c1441ba66f44b2accee61609177ed340128b407ecec7c64bbe50d63d22d8627727", + "f63d88122877ec30b8c8b00d22e89000a966426112bd44166e2f525b769ccbe9b286d437a0129130dde1a86c43e04bedb594e671d98283afe64ce331de9828fd", + "348b0532880b88a6614a8d7408c3f913357fbb60e995c60205be9139e74998aede7f4581e42f6b52698f7fa1219708c14498067fd1e09502de83a77dd281150c", + "5133dc8bef725359dff59792d85eaf75b7e1dcd1978b01c35b1b85fcebc63388ad99a17b6346a217dc1a9622ebd122ecf6913c4d31a6b52a695b86af00d741a0", + "2753c4c0e98ecad806e88780ec27fccd0f5c1ab547f9e4bf1659d192c23aa2cc971b58b6802580baef8adc3b776ef7086b2545c2987f348ee3719cdef258c403", + "b1663573ce4b9d8caefc865012f3e39714b9898a5da6ce17c25a6a47931a9ddb9bbe98adaa553beed436e89578455416c2a52a525cf2862b8d1d49a2531b7391", + "64f58bd6bfc856f5e873b2a2956ea0eda0d6db0da39c8c7fc67c9f9feefcff3072cdf9e6ea37f69a44f0c61aa0da3693c2db5b54960c0281a088151db42b11e8", + "0764c7be28125d9065c4b98a69d60aede703547c66a12e17e1c618994132f5ef82482c1e3fe3146cc65376cc109f0138ed9a80e49f1f3c7d610d2f2432f20605", + "f748784398a2ff03ebeb07e155e66116a839741a336e32da71ec696001f0ad1b25cd48c69cfca7265eca1dd71904a0ce748ac4124f3571076dfa7116a9cf00e9", + "3f0dbc0186bceb6b785ba78d2a2a013c910be157bdaffae81bb6663b1a73722f7f1228795f3ecada87cf6ef0078474af73f31eca0cc200ed975b6893f761cb6d", + "d4762cd4599876ca75b2b8fe249944dbd27ace741fdab93616cbc6e425460feb51d4e7adcc38180e7fc47c89024a7f56191adb878dfde4ead62223f5a2610efe", + "cd36b3d5b4c91b90fcbba79513cfee1907d8645a162afd0cd4cf4192d4a5f4c892183a8eacdb2b6b6a9d9aa8c11ac1b261b380dbee24ca468f1bfd043c58eefe", + "98593452281661a53c48a9d8cd790826c1a1ce567738053d0bee4a91a3d5bd92eefdbabebe3204f2031ca5f781bda99ef5d8ae56e5b04a9e1ecd21b0eb05d3e1", + "771f57dd2775ccdab55921d3e8e30ccf484d61fe1c1b9c2ae819d0fb2a12fab9be70c4a7a138da84e8280435daade5bbe66af0836a154f817fb17f3397e725a3", + "c60897c6f828e21f16fbb5f15b323f87b6c8955eabf1d38061f707f608abdd993fac3070633e286cf8339ce295dd352df4b4b40b2f29da1dd50b3a05d079e6bb", + "8210cd2c2d3b135c2cf07fa0d1433cd771f325d075c6469d9c7f1ba0943cd4ab09808cabf4acb9ce5bb88b498929b4b847f681ad2c490d042db2aec94214b06b", + "1d4edfffd8fd80f7e4107840fa3aa31e32598491e4af7013c197a65b7f36dd3ac4b478456111cd4309d9243510782fa31b7c4c95fa951520d020eb7e5c36e4ef", + "af8e6e91fab46ce4873e1a50a8ef448cc29121f7f74deef34a71ef89cc00d9274bc6c2454bbb3230d8b2ec94c62b1dec85f3593bfa30ea6f7a44d7c09465a253", + "29fd384ed4906f2d13aa9fe7af905990938bed807f1832454a372ab412eea1f5625a1fcc9ac8343b7c67c5aba6e0b1cc4644654913692c6b39eb9187ceacd3ec", + "a268c7885d9874a51c44dffed8ea53e94f78456e0b2ed99ff5a3924760813826d960a15edbedbb5de5226ba4b074e71b05c55b9756bb79e55c02754c2c7b6c8a", + "0cf8545488d56a86817cd7ecb10f7116b7ea530a45b6ea497b6c72c997e09e3d0da8698f46bb006fc977c2cd3d1177463ac9057fdd1662c85d0c126443c10473", + "b39614268fdd8781515e2cfebf89b4d5402bab10c226e6344e6b9ae000fb0d6c79cb2f3ec80e80eaeb1980d2f8698916bd2e9f747236655116649cd3ca23a837", + "74bef092fc6f1e5dba3663a3fb003b2a5ba257496536d99f62b9d73f8f9eb3ce9ff3eec709eb883655ec9eb896b9128f2afc89cf7d1ab58a72f4a3bf034d2b4a", + "3a988d38d75611f3ef38b8774980b33e573b6c57bee0469ba5eed9b44f29945e7347967fba2c162e1c3be7f310f2f75ee2381e7bfd6b3f0baea8d95dfb1dafb1", + "58aedfce6f67ddc85a28c992f1c0bd0969f041e66f1ee88020a125cbfcfebcd61709c9c4eba192c15e69f020d462486019fa8dea0cd7a42921a19d2fe546d43d", + "9347bd291473e6b4e368437b8e561e065f649a6d8ada479ad09b1999a8f26b91cf6120fd3bfe014e83f23acfa4c0ad7b3712b2c3c0733270663112ccd9285cd9", + "b32163e7c5dbb5f51fdc11d2eac875efbbcb7e7699090a7e7ff8a8d50795af5d74d9ff98543ef8cdf89ac13d0485278756e0ef00c817745661e1d59fe38e7537", + "1085d78307b1c4b008c57a2e7e5b234658a0a82e4ff1e4aaac72b312fda0fe27d233bc5b10e9cc17fdc7697b540c7d95eb215a19a1a0e20e1abfa126efd568c7", + "4e5c734c7dde011d83eac2b7347b373594f92d7091b9ca34cb9c6f39bdf5a8d2f134379e16d822f6522170ccf2ddd55c84b9e6c64fc927ac4cf8dfb2a17701f2", + "695d83bd990a1117b3d0ce06cc888027d12a054c2677fd82f0d4fbfc93575523e7991a5e35a3752e9b70ce62992e268a877744cdd435f5f130869c9a2074b338", + "a6213743568e3b3158b9184301f3690847554c68457cb40fc9a4b8cfd8d4a118c301a07737aeda0f929c68913c5f51c80394f53bff1c3e83b2e40ca97eba9e15", + "d444bfa2362a96df213d070e33fa841f51334e4e76866b8139e8af3bb3398be2dfaddcbc56b9146de9f68118dc5829e74b0c28d7711907b121f9161cb92b69a9", + "142709d62e28fcccd0af97fad0f8465b971e82201dc51070faa0372aa43e92484be1c1e73ba10906d5d1853db6a4106e0a7bf9800d373d6dee2d46d62ef2a461", +}