update the blake2b implementation (#3724)

Fixes a performance bug caused by SSE-AVX register savings on amd64.
This commit is contained in:
Andreas Auernhammer 2017-02-10 00:01:00 +01:00 committed by Harshavardhana
parent c9b1468c3b
commit f38222c0cc
22 changed files with 1955 additions and 4155 deletions

View file

@ -24,8 +24,8 @@ import (
"sync"
"github.com/klauspost/reedsolomon"
"github.com/minio/blake2b-simd"
"github.com/minio/sha256-simd"
"golang.org/x/crypto/blake2b"
)
// newHashWriters - inititialize a slice of hashes for the disk count.
@ -48,14 +48,14 @@ func newHash(algo string) (h hash.Hash) {
// ignore the error, because New512 without a key never fails
// New512 only returns a non-nil error, if the length of the passed
// key > 64 bytes - but we use blake2b as hash function (no key)
h = blake2b.New512()
h, _ = blake2b.New512(nil)
// Add new hashes here.
default:
// Default to blake2b.
// ignore the error, because New512 without a key never fails
// New512 only returns a non-nil error, if the length of the passed
// key > 64 bytes - but we use blake2b as hash function (no key)
h = blake2b.New512()
h, _ = blake2b.New512(nil)
}
return h
}

View file

@ -1,301 +0,0 @@
// Written in 2012 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// http://creativecommons.org/publicdomain/zero/1.0/
// Package blake2b implements BLAKE2b cryptographic hash function.
package blake2b
import (
"encoding/binary"
"errors"
"hash"
)
const (
BlockSize = 128 // block size of algorithm
Size = 64 // maximum digest size
SaltSize = 16 // maximum salt size
PersonSize = 16 // maximum personalization string size
KeySize = 64 // maximum size of key
)
type digest struct {
h [8]uint64 // current chain value
t [2]uint64 // message bytes counter
f [2]uint64 // finalization flags
x [BlockSize]byte // buffer for data not yet compressed
nx int // number of bytes in buffer
ih [8]uint64 // initial chain value (after config)
paddedKey [BlockSize]byte // copy of key, padded with zeros
isKeyed bool // indicates whether hash was keyed
size uint8 // digest size in bytes
isLastNode bool // indicates processing of the last node in tree hashing
}
// Initialization values.
var iv = [8]uint64{
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
}
// Config is used to configure hash function parameters and keying.
// All parameters are optional.
type Config struct {
Size uint8 // digest size (if zero, default size of 64 bytes is used)
Key []byte // key for prefix-MAC
Salt []byte // salt (if < 16 bytes, padded with zeros)
Person []byte // personalization (if < 16 bytes, padded with zeros)
Tree *Tree // parameters for tree hashing
}
// Tree represents parameters for tree hashing.
type Tree struct {
Fanout uint8 // fanout
MaxDepth uint8 // maximal depth
LeafSize uint32 // leaf maximal byte length (0 for unlimited)
NodeOffset uint64 // node offset (0 for first, leftmost or leaf)
NodeDepth uint8 // node depth (0 for leaves)
InnerHashSize uint8 // inner hash byte length
IsLastNode bool // indicates processing of the last node of layer
}
var (
defaultConfig = &Config{Size: Size}
config256 = &Config{Size: 32}
)
func verifyConfig(c *Config) error {
if c.Size > Size {
return errors.New("digest size is too large")
}
if len(c.Key) > KeySize {
return errors.New("key is too large")
}
if len(c.Salt) > SaltSize {
// Smaller salt is okay: it will be padded with zeros.
return errors.New("salt is too large")
}
if len(c.Person) > PersonSize {
// Smaller personalization is okay: it will be padded with zeros.
return errors.New("personalization is too large")
}
if c.Tree != nil {
if c.Tree.Fanout == 1 {
return errors.New("fanout of 1 is not allowed in tree mode")
}
if c.Tree.MaxDepth < 2 {
return errors.New("incorrect tree depth")
}
if c.Tree.InnerHashSize < 1 || c.Tree.InnerHashSize > Size {
return errors.New("incorrect tree inner hash size")
}
}
return nil
}
// New returns a new hash.Hash configured with the given Config.
// Config can be nil, in which case the default one is used, calculating 64-byte digest.
// Returns non-nil error if Config contains invalid parameters.
func New(c *Config) (hash.Hash, error) {
if c == nil {
c = defaultConfig
} else {
if c.Size == 0 {
// Set default size if it's zero.
c.Size = Size
}
if err := verifyConfig(c); err != nil {
return nil, err
}
}
d := new(digest)
d.initialize(c)
return d, nil
}
// initialize initializes digest with the given
// config, which must be non-nil and verified.
func (d *digest) initialize(c *Config) {
// Create parameter block.
var p [BlockSize]byte
p[0] = c.Size
p[1] = uint8(len(c.Key))
if c.Salt != nil {
copy(p[32:], c.Salt)
}
if c.Person != nil {
copy(p[48:], c.Person)
}
if c.Tree != nil {
p[2] = c.Tree.Fanout
p[3] = c.Tree.MaxDepth
binary.LittleEndian.PutUint32(p[4:], c.Tree.LeafSize)
binary.LittleEndian.PutUint64(p[8:], c.Tree.NodeOffset)
p[16] = c.Tree.NodeDepth
p[17] = c.Tree.InnerHashSize
} else {
p[2] = 1
p[3] = 1
}
// Initialize.
d.size = c.Size
for i := 0; i < 8; i++ {
d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(p[i*8:])
}
if c.Tree != nil && c.Tree.IsLastNode {
d.isLastNode = true
}
// Process key.
if c.Key != nil {
copy(d.paddedKey[:], c.Key)
d.Write(d.paddedKey[:])
d.isKeyed = true
}
// Save a copy of initialized state.
copy(d.ih[:], d.h[:])
}
// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum.
func New512() hash.Hash {
d := new(digest)
d.initialize(defaultConfig)
return d
}
// New256 returns a new hash.Hash computing the BLAKE2b 32-byte checksum.
func New256() hash.Hash {
d := new(digest)
d.initialize(config256)
return d
}
// NewMAC returns a new hash.Hash computing BLAKE2b prefix-
// Message Authentication Code of the given size in bytes
// (up to 64) with the given key (up to 64 bytes in length).
func NewMAC(outBytes uint8, key []byte) hash.Hash {
d, err := New(&Config{Size: outBytes, Key: key})
if err != nil {
panic(err.Error())
}
return d
}
// Reset resets the state of digest to the initial state
// after configuration and keying.
func (d *digest) Reset() {
copy(d.h[:], d.ih[:])
d.t[0] = 0
d.t[1] = 0
d.f[0] = 0
d.f[1] = 0
d.nx = 0
if d.isKeyed {
d.Write(d.paddedKey[:])
}
}
// Size returns the digest size in bytes.
func (d *digest) Size() int { return int(d.size) }
// BlockSize returns the algorithm block size in bytes.
func (d *digest) BlockSize() int { return BlockSize }
func (d *digest) Write(p []byte) (nn int, err error) {
nn = len(p)
left := BlockSize - d.nx
if len(p) > left {
// Process buffer.
copy(d.x[d.nx:], p[:left])
p = p[left:]
compress(d, d.x[:])
d.nx = 0
}
// Process full blocks except for the last one.
if len(p) > BlockSize {
n := len(p) &^ (BlockSize - 1)
if n == len(p) {
n -= BlockSize
}
compress(d, p[:n])
p = p[n:]
}
// Fill buffer.
d.nx += copy(d.x[d.nx:], p)
return
}
// Sum returns the calculated checksum.
func (d *digest) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
hash := d0.checkSum()
return append(in, hash[:d0.size]...)
}
func (d *digest) checkSum() [Size]byte {
// Do not create unnecessary copies of the key.
if d.isKeyed {
for i := 0; i < len(d.paddedKey); i++ {
d.paddedKey[i] = 0
}
}
dec := BlockSize - uint64(d.nx)
if d.t[0] < dec {
d.t[1]--
}
d.t[0] -= dec
// Pad buffer with zeros.
for i := d.nx; i < len(d.x); i++ {
d.x[i] = 0
}
// Set last block flag.
d.f[0] = 0xffffffffffffffff
if d.isLastNode {
d.f[1] = 0xffffffffffffffff
}
// Compress last block.
compress(d, d.x[:])
var out [Size]byte
j := 0
for _, s := range d.h[:(d.size-1)/8+1] {
out[j+0] = byte(s >> 0)
out[j+1] = byte(s >> 8)
out[j+2] = byte(s >> 16)
out[j+3] = byte(s >> 24)
out[j+4] = byte(s >> 32)
out[j+5] = byte(s >> 40)
out[j+6] = byte(s >> 48)
out[j+7] = byte(s >> 56)
j += 8
}
return out
}
// Sum512 returns a 64-byte BLAKE2b hash of data.
func Sum512(data []byte) [64]byte {
var d digest
d.initialize(defaultConfig)
d.Write(data)
return d.checkSum()
}
// Sum256 returns a 32-byte BLAKE2b hash of data.
func Sum256(data []byte) (out [32]byte) {
var d digest
d.initialize(config256)
d.Write(data)
sum := d.checkSum()
copy(out[:], sum[:32])
return
}

View file

@ -1,47 +0,0 @@
//+build !noasm
//+build !appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
//go:noescape
func compressAVX2Loop(p []uint8, in, iv, t, f, shffle, out []uint64)
func compressAVX2(d *digest, p []uint8) {
var (
in [8]uint64
out [8]uint64
shffle [8]uint64
)
// vector for PSHUFB instruction
shffle[0] = 0x0201000706050403
shffle[1] = 0x0a09080f0e0d0c0b
shffle[2] = 0x0201000706050403
shffle[3] = 0x0a09080f0e0d0c0b
shffle[4] = 0x0100070605040302
shffle[5] = 0x09080f0e0d0c0b0a
shffle[6] = 0x0100070605040302
shffle[7] = 0x09080f0e0d0c0b0a
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
compressAVX2Loop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
}

View file

@ -1,671 +0,0 @@
//+build !noasm !appengine
//
// Minio Cloud Storage, (C) 2016 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
// Based on AVX2 implementation from https://github.com/sneves/blake2-avx2/blob/master/blake2b-common.h
//
// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent
//
// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as:
// #define ROUND(r) \
// LOAD_MSG_ ##r ##_1(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_2(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
// LOAD_MSG_ ##r ##_3(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_4(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
//
// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go
//
// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly)
//
// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and
// rounds 2 & 12 are identical)
//
#define G1 \
\ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc4 \ // VPADDQ YMM0,YMM0,YMM4 /* v0 += m[0], v1 += m[2], v2 += m[4], v3 += m[6] */
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */
BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */
BYTE $0xc5; BYTE $0xfd; BYTE $0x70; BYTE $0xdb; BYTE $0xb1 \ // VPSHUFD YMM3,YMM3,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = */
BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */
BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */
BYTE $0xc4; BYTE $0xe2; BYTE $0x75; BYTE $0x00; BYTE $0xce // VPSHUFB YMM1,YMM1,YMM6 /* v4 = v4<<(64-24) | v4>>24, ..., ..., v7 = v7<<(64-24) | v7>>24 */
#define G2 \
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc5 \ // VPADDQ YMM0,YMM0,YMM5 /* v0 += m[1], v1 += m[3], v2 += m[5], v3 += m[7] */
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */
BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */
BYTE $0xc4; BYTE $0xe2; BYTE $0x65; BYTE $0x00; BYTE $0xdf \ // VPSHUFB YMM3,YMM3,YMM7 /* v12 = v12<<(64-16) | v12>>16, ..., ..., v15 = v15<<(64-16) | v15>>16 */
BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */
BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */
BYTE $0xc5; BYTE $0x75; BYTE $0xd4; BYTE $0xf9 \ // VPADDQ YMM15,YMM1,YMM1 /* temp reg = reg*2 */
BYTE $0xc5; BYTE $0xf5; BYTE $0x73; BYTE $0xd1; BYTE $0x3f \ // VPSRLQ YMM1,YMM1,0x3f /* reg = reg>>63 */
BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcf // VPXOR YMM1,YMM1,YMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */
#define DIAGONALIZE \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x93
BYTE $0x93 \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e
BYTE $0x4e \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x39
BYTE $0x39 \
// DO NOT DELETE -- macro delimiter (previous line extended)
#define UNDIAGONALIZE \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x39
BYTE $0x39 \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e
BYTE $0x4e \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x93
BYTE $0x93 \
// DO NOT DELETE -- macro delimiter (previous line extended)
#define LOAD_SHUFFLE \
MOVQ shffle+120(FP), SI \ // SI: &shuffle
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x36 \ // VMOVDQU YMM6, [rsi]
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x7e; BYTE $0x20 // VMOVDQU YMM7, 32[rsi]
// func compressAVX2Loop(compressSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·compressAVX2Loop(SB), 7, $0
// REGISTER USE
// Y0 - Y3: v0 - v15
// Y4 - Y5: m[0] - m[7]
// Y6 - Y7: shuffle value
// Y8 - Y9: temp registers
// Y10 -Y13: copy of full message
// Y15: temp register
// Load digest
MOVQ in+24(FP), SI // SI: &in
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x06 // VMOVDQU YMM0, [rsi]
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x4e; BYTE $0x20 // VMOVDQU YMM1, 32[rsi]
// Already store digest into &out (so we can reload it later generically)
MOVQ out+144(FP), SI // SI: &out
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1
// Initialize message pointer and loop counter
MOVQ message+0(FP), DX // DX: &p (message)
MOVQ message_len+8(FP), R8 // R8: len(message)
SHRQ $7, R8 // len(message) / 128
CMPQ R8, $0
JEQ complete
loop:
// Increment counter
MOVQ t+72(FP), SI // SI: &t
MOVQ 0(SI), R9 //
ADDQ $128, R9 // /* d.t[0] += BlockSize */
MOVQ R9, 0(SI) //
CMPQ R9, $128 // /* if d.t[0] < BlockSize { */
JGE noincr //
MOVQ 8(SI), R9 //
ADDQ $1, R9 // /* d.t[1]++ */
MOVQ R9, 8(SI) //
noincr: // /* } */
// Load initialization vector
MOVQ iv+48(FP), SI // SI: &iv
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x16 // VMOVDQU YMM2, [rsi]
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x5e; BYTE $0x20 // VMOVDQU YMM3, 32[rsi]
MOVQ t+72(FP), SI // SI: &t
BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 0 /* Y8 = t[0]+t[1] */
BYTE $0x00
MOVQ t+96(FP), SI // SI: &f
BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 1 /* Y8 = t[0]+t[1]+f[0]+f[1] */
BYTE $0x01
BYTE $0xc4; BYTE $0xc1; BYTE $0x65; BYTE $0xef; BYTE $0xd8 // VPXOR YMM3,YMM3,YMM8 /* Y3 = Y3 ^ Y8 */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x12 // VMOVDQU YMM10, [rdx] /* Y10 = m[0]+ m[1]+ m[2]+ m[3] */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x5a; BYTE $0x20 // VMOVDQU YMM11, 32[rdx] /* Y11 = m[4]+ m[5]+ m[6]+ m[7] */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x62; BYTE $0x40 // VMOVDQU YMM12, 64[rdx] /* Y12 = m[8]+ m[9]+m[10]+m[11] */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6a; BYTE $0x60 // VMOVDQU YMM13, 96[rdx] /* Y13 = m[12]+m[13]+m[14]+m[15] */
LOAD_SHUFFLE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 2
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */
BYTE $0x02
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */
BYTE $0x01
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */
BYTE $0x09
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 3
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc2 // VPUNPCKLQDQ YMM8, YMM12, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 4
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM12, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x08
BYTE $0x08
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM11, YMM12
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 5
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM12, YMM11
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30
BYTE $0x30
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM12, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 6
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM12, YMM10
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM13, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 7
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM13, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x01
BYTE $0x01
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31
BYTE $0x31
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 8
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM12, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM11, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 9
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM12, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31
BYTE $0x31
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x15; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM13, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x04
BYTE $0x04
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 10
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31
BYTE $0x31
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM10, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM13, YMM10
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 1
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 2
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */
BYTE $0x02
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */
BYTE $0x01
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */
BYTE $0x09
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
UNDIAGONALIZE
// Reload digest (most current value store in &out)
MOVQ out+144(FP), SI // SI: &in
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x26 // VMOVDQU YMM12, [rsi]
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6e; BYTE $0x20 // VMOVDQU YMM13, 32[rsi]
BYTE $0xc5; BYTE $0xfd; BYTE $0xef; BYTE $0xc2 // VPXOR YMM0,YMM0,YMM2 /* X0 = X0 ^ X4, X1 = X1 ^ X5 */
BYTE $0xc4; BYTE $0xc1; BYTE $0x7d; BYTE $0xef; BYTE $0xc4 // VPXOR YMM0,YMM0,YMM12 /* X0 = X0 ^ X12, X1 = X1 ^ X13 */
BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xcb // VPXOR YMM1,YMM1,YMM3 /* X2 = X2 ^ X6, X3 = X3 ^ X7 */
BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcd // VPXOR YMM1,YMM1,YMM13 /* X2 = X2 ^ X14, X3 = X3 ^ X15 */
// Store digest into &out
MOVQ out+144(FP), SI // SI: &out
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1
// Increment message pointer and check if there's more to do
ADDQ $128, DX // message += 128
SUBQ $1, R8
JNZ loop
complete:
BYTE $0xc5; BYTE $0xf8; BYTE $0x77 // VZEROUPPER /* Prevent further context switches */
RET

View file

@ -1,41 +0,0 @@
//+build !noasm
//+build !appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
//go:noescape
func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64)
func compressAVX(d *digest, p []uint8) {
var (
in [8]uint64
out [8]uint64
shffle [2]uint64
)
// vector for PSHUFB instruction
shffle[0] = 0x0201000706050403
shffle[1] = 0x0a09080f0e0d0c0b
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
blockAVXLoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
}

View file

@ -1,682 +0,0 @@
//+build !noasm !appengine
//
// Minio Cloud Storage, (C) 2016 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c
//
// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent
//
// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as:
// #define ROUND(r) \
// LOAD_MSG_ ##r ##_1(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_2(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
// LOAD_MSG_ ##r ##_3(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_4(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
//
// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go
//
// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly)
//
// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and
// rounds 2 & 12 are identical)
//
#define G1 \
\ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd479c1c4; BYTE $0xc0 \ // VPADDQ XMM0,XMM0,XMM8 /* v0 += m[0], v1 += m[2] */
LONG $0xd471c1c4; BYTE $0xc9 \ // VPADDQ XMM1,XMM1,XMM9 /* v2 += m[4], v3 += m[6] */
LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf670f9c5; BYTE $0xb1 \ // VPSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */
LONG $0xff70f9c5; BYTE $0xb1 \ // VPSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */
LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
LONG $0x0069c2c4; BYTE $0xd4 \ // VPSHUFB XMM2,XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */
LONG $0x0061c2c4; BYTE $0xdc // VPSHUFB XMM3,XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */
#define G2 \
\ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd479c1c4; BYTE $0xc2 \ // VPADDQ XMM0,XMM0,XMM10 /* v0 += m[1], v1 += m[3] */
LONG $0xd471c1c4; BYTE $0xcb \ // VPADDQ XMM1,XMM1,XMM11 /* v2 += m[5], v3 += m[7] */
LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf670fbc5; BYTE $0x39 \ // VPSHUFLW XMM6,XMM6,0x39 /* combined with next ... */
LONG $0xf670fac5; BYTE $0x39 \ // VPSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */
LONG $0xff70fbc5; BYTE $0x39 \ // VPSHUFLW XMM7,XMM7,0x39 /* combined with next ... */
LONG $0xff70fac5; BYTE $0x39 \ // VPSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */
LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
LONG $0xfad469c5 \ // VPADDQ XMM15,XMM2,XMM2 /* temp reg = reg*2 */
LONG $0xd273e9c5; BYTE $0x3f \ // VPSRLQ XMM2,XMM2,0x3f /* reg = reg>>63 */
LONG $0xef69c1c4; BYTE $0xd7 \ // VPXOR XMM2,XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */
LONG $0xfbd461c5 \ // VPADDQ XMM15,XMM3,XMM3 /* temp reg = reg*2 */
LONG $0xd373e1c5; BYTE $0x3f \ // VPSRLQ XMM3,XMM3,0x3f /* reg = reg>>63 */
LONG $0xef61c1c4; BYTE $0xdf // VPXOR XMM3,XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */
#define DIAGONALIZE \
\ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X6, X13 \ /* t0 = row4l;\ */
MOVOU X2, X14 \ /* t1 = row2l;\ */
MOVOU X4, X6 \ /* row4l = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X6, X5 \ /* row3h = row4l;\ */
LONG $0x6c1141c4; BYTE $0xfd \ // VPUNPCKLQDQ XMM15, XMM13, XMM13 /* _mm_unpacklo_epi64(t0, t0) */
LONG $0x6d41c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM7, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */
LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
LONG $0x6d11c1c4; BYTE $0xff \ // VPUNPCKHQDQ XMM7, XMM13, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
LONG $0x6d69c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */
LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d61c1c4; BYTE $0xdf // VPUNPCKHQDQ XMM3, XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */
#define UNDIAGONALIZE \
\ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X4, X13 \ /* t0 = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X13, X5 \ /* row3h = t0;\ */
MOVOU X2, X13 \ /* t0 = row2l;\ */
MOVOU X6, X14 \ /* t1 = row4l;\ */
LONG $0xfa6c69c5 \ // VPUNPCKLQDQ XMM15, XMM2, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */
LONG $0x6d61c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM3, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */
LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
LONG $0x6d11c1c4; BYTE $0xdf \ // VPUNPCKHQDQ XMM3, XMM13, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
LONG $0x6d49c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */
LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d41c1c4; BYTE $0xff // VPUNPCKHQDQ XMM7, XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */
#define LOAD_SHUFFLE \
\ // Load shuffle value
MOVQ shffle+120(FP), SI \ // SI: &shuffle
MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a
// func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·blockAVXLoop(SB), 7, $0
// REGISTER USE
// R8: loop counter
// DX: message pointer
// SI: temp pointer for loading
// X0 - X7: v0 - v15
// X8 - X11: m[0] - m[7]
// X12: shuffle value
// X13 - X15: temp registers
// Load digest
MOVQ in+24(FP), SI // SI: &in
MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Already store digest into &out (so we can reload it later generically)
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Initialize message pointer and loop counter
MOVQ message+0(FP), DX // DX: &p (message)
MOVQ message_len+8(FP), R8 // R8: len(message)
SHRQ $7, R8 // len(message) / 128
CMPQ R8, $0
JEQ complete
loop:
// Increment counter
MOVQ t+72(FP), SI // SI: &t
MOVQ 0(SI), R9
ADDQ $128, R9 // /* d.t[0] += BlockSize */
MOVQ R9, 0(SI)
CMPQ R9, $128 // /* if d.t[0] < BlockSize { */
JGE noincr
MOVQ 8(SI), R9
ADDQ $1, R9 // /* d.t[1]++ */
MOVQ R9, 8(SI)
noincr: // /* } */
// Load initialization vector
MOVQ iv+48(FP), SI // SI: &iv
MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */
MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */
MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */
MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */
MOVQ t+72(FP), SI // SI: &t
MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */
PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */
MOVQ t+96(FP), SI // SI: &f
MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */
PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */
///////////////////////////////////////////////////////////////////////////
// R O U N D 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */
LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */
LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */
LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */
LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 3
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x0f0943c4; WORD $0x08c5 // VPALIGNR XMM8, XMM14, XMM13, 0x8 /* m[11], m[12] */
LONG $0x6d1941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM12, XMM15 /* m[5], m[15] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c0141c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM15, XMM12 /* m[8], m[0] */
LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[2], ___ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[3] */
LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[10], ___ */
LONG $0x6d1141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM13, XMM14 /* m[7], m[9] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X14 // X14 = m[4]+ m[5]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c0141c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM15, XMM13 /* m[14], m[6] */
LONG $0x0f0943c4; WORD $0x08dc // VPALIGNR XMM11, XMM14, XMM12, 0x8 /* m[1], m[4] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 4
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6d1141c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM13, XMM12 /* m[7], m[3] */
LONG $0x6d0141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM15, XMM14 /* m[13], m[11] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 112(DX), X14 // X14 = m[14]+m[15]
LONG $0x6d1141c4; BYTE $0xd4 // VPUNPCKHQDQ XMM10, XMM13, XMM12 /* m[9], m[1] */
LONG $0x6c0141c4; BYTE $0xde // VPUNPCKLQDQ XMM11, XMM15, XMM14 /* m[12], m[14] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d1141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM13, XMM13 /* ___, m[5] */
LONG $0x6c1941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM12, XMM8 /* m[2], ____ */
LONG $0x6d0141c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM15, XMM15 /* ___, m[15] */
LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[4], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[6], m[10] */
LONG $0x6c1941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM12, XMM15 /* m[0], m[8] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 5
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[9], m[5] */
LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[2], m[10] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM14, XMM14 /* ___, m[7] */
LONG $0x6c1941c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM12, XMM10 /* m[0], ____ */
LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[15] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[11] */
LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[14], ____ */
LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */
LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[6], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x0f0943c4; WORD $0x08d4 // VPALIGNR XMM10, XMM14, XMM12, 0x8 /* m[1], m[12] */
LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[8], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 6
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c1141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM13, XMM14 /* m[2], m[6] */
LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[0], m[8] */
MOVOU 80(DX), X12 // X12 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[10] */
LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[11], m[3] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[7] */
LONG $0x6c1141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM13, XMM8 /* m[4], ____ */
LONG $0x6d0141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM15, XMM12 /* m[15], m[1] */
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6d0941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM14, XMM13 /* m[13], m[5] */
LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[9] */
LONG $0x6c0141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM15, XMM11 /* m[14], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 7
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[1] */
LONG $0x6c0941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM14, XMM8 /* m[12], ____ */
LONG $0x6c0141c4; BYTE $0xcd // VPUNPCKLQDQ XMM9, XMM15, XMM13 /* m[14], m[4] */
MOVOU 80(DX), X12 // X12 = m[10]+m[11]
LONG $0x6d1141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM13, XMM15 /* m[5], m[15] */
LONG $0x0f1943c4; WORD $0x08de // VPALIGNR XMM11, XMM12, XMM14, 0x8 /* m[13], m[10] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[6] */
LONG $0x0f0943c4; WORD $0x08ce // VPALIGNR XMM9, XMM14, XMM14, 0x8 /* m[9], m[8] */
MOVOU 16(DX), X14 // X14 = m[2]+ m[3]
LONG $0x6d1141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM13, XMM14 /* m[7], m[3] */
LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[11] */
LONG $0x6c0941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM14, XMM11 /* m[2], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 8
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[13], m[7] */
LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */
LONG $0x6c0941c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM14, XMM9 /* m[12], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
LONG $0x0f0143c4; WORD $0x08d6 // VPALIGNR XMM10, XMM15, XMM14, 0x8 /* m[11], m[14] */
LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[1], m[9] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d1141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM13, XMM15 /* m[5], m[15] */
LONG $0x6c0941c4; BYTE $0xcc // VPUNPCKLQDQ XMM9, XMM14, XMM12 /* m[8], m[2] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c1941c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM12, XMM13 /* m[0], m[4] */
LONG $0x6c0941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM14, XMM15 /* m[6], m[10] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 9
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c1141c4; BYTE $0xc7 // VPUNPCKLQDQ XMM8, XMM13, XMM15 /* m[6], m[14] */
LONG $0x0f1943c4; WORD $0x08ce // VPALIGNR XMM9, XMM12, XMM14, 0x8 /* m[11], m[0] */
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
LONG $0x6d0141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM15, XMM14 /* m[15], m[9] */
LONG $0x0f0943c4; WORD $0x08dd // VPALIGNR XMM11, XMM14, XMM13, 0x8 /* m[3], m[8] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6d0141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM15, XMM15 /* ___, m[13] */
LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[12], ____ */
LONG $0x0f0943c4; WORD $0x08cc // VPALIGNR XMM9, XMM14, XMM12, 0x8 /* m[1], m[10] */
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
LONG $0x6d0141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM15, XMM15 /* ___, m[7] */
LONG $0x6c1141c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM13, XMM10 /* m[2], ____ */
LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[5] */
LONG $0x6c1941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM12, XMM11 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 0
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c0141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM15, XMM14 /* m[10], m[8] */
LONG $0x6d1141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM13, XMM12 /* m[7], m[1] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X14 // X14 = m[4]+ m[5]
LONG $0x6c1941c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM12, XMM14 /* m[2], m[4] */
LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[5] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[6], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM15, XMM13 /* m[15], m[9] */
LONG $0x6d1941c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM12, XMM14 /* m[3], m[13] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
LONG $0x0f0143c4; WORD $0x08d5 // VPALIGNR XMM10, XMM15, XMM13, 0x8 /* m[11], m[14] */
LONG $0x6c0941c4; BYTE $0xdc // VPUNPCKLQDQ XMM11, XMM14, XMM12 /* m[12], m[0] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */
LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */
LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */
LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */
LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
// Reload digest (most current value store in &out)
MOVQ out+144(FP), SI // SI: &in
MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Final computations and prepare for storing
PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */
PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */
PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */
PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */
PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */
PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */
PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */
PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */
// Store digest into &out
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Increment message pointer and check if there's more to do
ADDQ $128, DX // message += 128
SUBQ $1, R8
JNZ loop
complete:
RET

View file

@ -1,41 +0,0 @@
//+build !noasm
//+build !appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
//go:noescape
func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64)
func compressSSE(d *digest, p []uint8) {
var (
in [8]uint64
out [8]uint64
shffle [2]uint64
)
// vector for PSHUFB instruction
shffle[0] = 0x0201000706050403
shffle[1] = 0x0a09080f0e0d0c0b
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
blockSSELoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
}

View file

@ -1,770 +0,0 @@
//+build !noasm !appengine
//
// Minio Cloud Storage, (C) 2016 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c
//
// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent
//
// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as:
// #define ROUND(r) \
// LOAD_MSG_ ##r ##_1(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_2(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
// LOAD_MSG_ ##r ##_3(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_4(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
//
// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go
//
// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly)
//
// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and
// rounds 2 & 12 are identical)
//
#define G1 \
\ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd40f4166; BYTE $0xc0 \ // PADDQ XMM0,XMM8 /* v0 += m[0], v1 += m[2] */
LONG $0xd40f4166; BYTE $0xc9 \ // PADDQ XMM1,XMM9 /* v2 += m[4], v3 += m[6] */
LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf6700f66; BYTE $0xb1 \ // PSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */
LONG $0xff700f66; BYTE $0xb1 \ // PSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */
LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
LONG $0x380f4166; WORD $0xd400 \ // PSHUFB XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */
LONG $0x380f4166; WORD $0xdc00 // PSHUFB XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */
#define G2 \
\ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd40f4166; BYTE $0xc2 \ // PADDQ XMM0,XMM10 /* v0 += m[1], v1 += m[3] */
LONG $0xd40f4166; BYTE $0xcb \ // PADDQ XMM1,XMM11 /* v2 += m[5], v3 += m[7] */
LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf6700ff2; BYTE $0x39 \ // PSHUFLW XMM6,XMM6,0x39 /* combined with next ... */
LONG $0xf6700ff3; BYTE $0x39 \ // PSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */
LONG $0xff700ff2; BYTE $0x39 \ // PSHUFLW XMM7,XMM7,0x39 /* combined with next ... */
LONG $0xff700ff3; BYTE $0x39 \ // PSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */
LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
MOVOU X2, X15 \
LONG $0xd40f4466; BYTE $0xfa \ // PADDQ XMM15,XMM2 /* temp reg = reg*2 */
LONG $0xd2730f66; BYTE $0x3f \ // PSRLQ XMM2,0x3f /* reg = reg>>63 */
LONG $0xef0f4166; BYTE $0xd7 \ // PXOR XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */
MOVOU X3, X15 \
LONG $0xd40f4466; BYTE $0xfb \ // PADDQ XMM15,XMM3 /* temp reg = reg*2 */
LONG $0xd3730f66; BYTE $0x3f \ // PSRLQ XMM3,0x3f /* reg = reg>>63 */
LONG $0xef0f4166; BYTE $0xdf // PXOR XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */
#define DIAGONALIZE \
\ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X6, X13 \ /* t0 = row4l;\ */
MOVOU X2, X14 \ /* t1 = row2l;\ */
MOVOU X4, X6 \ /* row4l = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X6, X5 \ /* row3h = row4l;\ */
LONG $0x6c0f4566; BYTE $0xfd \ // PUNPCKLQDQ XMM15, XMM13 /* _mm_unpacklo_epi64(t0, t0) */
MOVOU X7, X6 \
LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */
LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
MOVOU X13, X7 \
LONG $0x6d0f4166; BYTE $0xff \ // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */
LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d0f4166; BYTE $0xdf // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */
#define UNDIAGONALIZE \
\ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X4, X13 \ /* t0 = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X13, X5 \ /* row3h = t0;\ */
MOVOU X2, X13 \ /* t0 = row2l;\ */
MOVOU X6, X14 \ /* t1 = row4l;\ */
LONG $0x6c0f4466; BYTE $0xfa \ // PUNPCKLQDQ XMM15, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */
MOVOU X3, X2 \
LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */
LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
MOVOU X13, X3 \
LONG $0x6d0f4166; BYTE $0xdf \ // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */
LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d0f4166; BYTE $0xff // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */
#define LOAD_SHUFFLE \
\ // Load shuffle value
MOVQ shffle+120(FP), SI \ // SI: &shuffle
MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a
// func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·blockSSELoop(SB), 7, $0
// REGISTER USE
// R8: loop counter
// DX: message pointer
// SI: temp pointer for loading
// X0 - X7: v0 - v15
// X8 - X11: m[0] - m[7]
// X12: shuffle value
// X13 - X15: temp registers
// Load digest
MOVQ in+24(FP), SI // SI: &in
MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Already store digest into &out (so we can reload it later generically)
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Initialize message pointer and loop counter
MOVQ message+0(FP), DX // DX: &p (message)
MOVQ message_len+8(FP), R8 // R8: len(message)
SHRQ $7, R8 // len(message) / 128
CMPQ R8, $0
JEQ complete
loop:
// Increment counter
MOVQ t+72(FP), SI // SI: &t
MOVQ 0(SI), R9
ADDQ $128, R9 // /* d.t[0] += BlockSize */
MOVQ R9, 0(SI)
CMPQ R9, $128 // /* if d.t[0] < BlockSize { */
JGE noincr
MOVQ 8(SI), R9
ADDQ $1, R9 // /* d.t[1]++ */
MOVQ R9, 8(SI)
noincr: // /* } */
// Load initialization vector
MOVQ iv+48(FP), SI // SI: &iv
MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */
MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */
MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */
MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */
MOVQ t+72(FP), SI // SI: &t
MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */
PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */
MOVQ t+96(FP), SI // SI: &f
MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */
PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */
///////////////////////////////////////////////////////////////////////////
// R O U N D 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X10 // X10 = m[10]+m[11]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */
LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ;
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU X12, X8
LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */
LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 3
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X14, X8
LONG $0x3a0f4566; WORD $0xc50f; BYTE $0x08 // PALIGNR XMM8, XMM13, 0x8 /* m[11], m[12] */
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[5], m[15] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X10 // X10 = m[8]+ m[9]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[8], m[0] */
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */
MOVOU X13, X11
LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[2], ___ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[3] */
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[10], ___ */
MOVOU X13, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[7], m[9] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X11 // X11 = m[4]+ m[5]
MOVOU 112(DX), X10 // X10 = m[14]+m[15]
LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[14], m[6] */
LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[1], m[4] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 4
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X13, X8
LONG $0x6d0f4566; BYTE $0xc4 // PUNPCKHQDQ XMM8, XMM12 /* m[7], m[3] */
MOVOU X15, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[13], m[11] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X10 // X10 = m[8]+ m[9]
MOVOU 112(DX), X14 // X14 = m[14]+m[15]
LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* m[9], m[1] */
MOVOU X15, X11
LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[12], m[14] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X13, X9
LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* ___, m[5] */
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[2], ____ */
MOVOU X15, X10
LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* ___, m[15] */
MOVOU X13, X9
LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[4], ____ */
MOVOU 0(DX), X11 // X11 = m[0]+ m[1]
MOVOU 48(DX), X10 // X10 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[6], m[10] */
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[0], m[8] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 5
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X14, X8
LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[9], m[5] */
MOVOU X12, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[2], m[10] */
MOVOU 0(DX), X10 // X10 = m[0]+ m[1]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[7] */
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[0], ____ */
LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[15] */
MOVOU X13, X11
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[11] */
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[14], ____ */
LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[3] */
MOVOU X13, X9
LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[6], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X11 // X11 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU X14, X10
LONG $0x3a0f4566; WORD $0xd40f; BYTE $0x08 // PALIGNR XMM10, XMM12, 0x8 /* m[1], m[12] */
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */
LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[8], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 6
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
MOVOU X13, X8
LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[2], m[6] */
MOVOU X12, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[0], m[8] */
MOVOU 80(DX), X12 // X12 = m[10]+m[11]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[10] */
MOVOU X12, X11
LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[11], m[3] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* ___, m[7] */
MOVOU X13, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[4], ____ */
MOVOU X15, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[15], m[1] */
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[13], m[5] */
LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[9] */
MOVOU X15, X11
LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[14], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 7
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[1] */
MOVOU X14, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */
MOVOU X15, X9
LONG $0x6c0f4566; BYTE $0xcd // PUNPCKLQDQ XMM9, XMM13 /* m[14], m[4] */
MOVOU 80(DX), X11 // X11 = m[10]+m[11]
MOVOU X13, X10
LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* m[5], m[15] */
LONG $0x3a0f4566; WORD $0xde0f; BYTE $0x08 // PALIGNR XMM11, XMM14, 0x8 /* m[13], m[10] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[6] */
MOVOU X14, X9
LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[9], m[8] */
MOVOU 16(DX), X11 // X14 = m[2]+ m[3]
MOVOU X13, X10
LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[7], m[3] */
LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[11] */
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[2], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 8
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X14, X8
LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[13], m[7] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* ___, m[3] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[12], ____ */
MOVOU 0(DX), X11 // X11 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU X15, X10
LONG $0x3a0f4566; WORD $0xd60f; BYTE $0x08 // PALIGNR XMM10, XMM14, 0x8 /* m[11], m[14] */
LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[1], m[9] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X13, X8
LONG $0x6d0f4566; BYTE $0xc7 // PUNPCKHQDQ XMM8, XMM15 /* m[5], m[15] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[8], m[2] */
MOVOU 0(DX), X10 // X10 = m[0]+ m[1]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[0], m[4] */
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], m[10] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 9
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X13, X8
LONG $0x6c0f4566; BYTE $0xc7 // PUNPCKLQDQ XMM8, XMM15 /* m[6], m[14] */
MOVOU X12, X9
LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[11], m[0] */
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X11 // X11 = m[8]+ m[9]
MOVOU X15, X10
LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[15], m[9] */
LONG $0x3a0f4566; WORD $0xdd0f; BYTE $0x08 // PALIGNR XMM11, XMM13, 0x8 /* m[3], m[8] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X15, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* ___, m[13] */
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */
MOVOU X14, X9
LONG $0x3a0f4566; WORD $0xcc0f; BYTE $0x08 // PALIGNR XMM9, XMM12, 0x8 /* m[1], m[10] */
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
MOVOU X15, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* ___, m[7] */
MOVOU X13, X10
LONG $0x6c0f4566; BYTE $0xd3 // PUNPCKLQDQ XMM10, XMM11 /* m[2], ____ */
MOVOU X12, X15
LONG $0x6d0f4566; BYTE $0xfc // PUNPCKHQDQ XMM15, XMM12 /* ___, m[5] */
MOVOU X12, X11
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 0
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[10], m[8] */
MOVOU X13, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[7], m[1] */
MOVOU 16(DX), X10 // X10 = m[2]+ m[3]
MOVOU 32(DX), X14 // X14 = m[4]+ m[5]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[2], m[4] */
MOVOU X14, X15
LONG $0x6d0f4566; BYTE $0xfe // PUNPCKHQDQ XMM15, XMM14 /* ___, m[5] */
MOVOU X13, X11
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X15, X8
LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[15], m[9] */
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[3], m[13] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU X15, X10
LONG $0x3a0f4566; WORD $0xd50f; BYTE $0x08 // PALIGNR XMM10, XMM13, 0x8 /* m[11], m[14] */
MOVOU X14, X11
LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[12], m[0] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X10 // X10 = m[10]+m[11]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */
LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ;
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU X12, X8
LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */
LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
// Reload digest (most current value store in &out)
MOVQ out+144(FP), SI // SI: &in
MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Final computations and prepare for storing
PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */
PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */
PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */
PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */
PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */
PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */
PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */
PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */
// Store digest into &out
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Increment message pointer and check if there's more to do
ADDQ $128, DX // message += 128
SUBQ $1, R8
JNZ loop
complete:
RET

View file

@ -1,30 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
func compress(d *digest, p []uint8) {
// Verifies if AVX2 or AVX is available, use optimized code path.
if avx2 {
compressAVX2(d, p)
} else if avx {
compressAVX(d, p)
} else if ssse3 {
compressSSE(d, p)
} else {
compressGeneric(d, p)
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,23 +0,0 @@
//+build !amd64 noasm appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
func compress(d *digest, p []uint8) {
compressGeneric(d, p)
}

View file

@ -1,60 +0,0 @@
// +build 386,!gccgo amd64,!gccgo
// Copyright 2016 Frank Wessels <fwessels@xs4all.nl>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package blake2b
func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func xgetbv(index uint32) (eax, edx uint32)
// True when SIMD instructions are available.
var avx2 = haveAVX2()
var avx = haveAVX()
var ssse3 = haveSSSE3()
// haveAVX returns true when there is AVX support
func haveAVX() bool {
_, _, c, _ := cpuid(1)
// Check XGETBV, OXSAVE and AVX bits
if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
// Check for OS support
eax, _ := xgetbv(0)
return (eax & 0x6) == 0x6
}
return false
}
// haveAVX2 returns true when there is AVX2 support
func haveAVX2() bool {
mfi, _, _, _ := cpuid(0)
// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
if mfi >= 7 && haveAVX() {
_, ebx, _, _ := cpuidex(7, 0)
return (ebx & 0x00000020) != 0
}
return false
}
// haveSSSE3 returns true when there is SSSE3 support
func haveSSSE3() bool {
_, _, c, _ := cpuid(1)
return (c & 0x00000200) != 0
}

View file

@ -1,33 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·xgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET

View file

@ -1,34 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build amd64,!gccgo
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·xgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET

194
vendor/golang.org/x/crypto/blake2b/blake2b.go generated vendored Normal file
View file

@ -0,0 +1,194 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blake2b implements the BLAKE2b hash algorithm as
// defined in RFC 7693.
package blake2b
import (
"encoding/binary"
"errors"
"hash"
)
const (
// The blocksize of BLAKE2b in bytes.
BlockSize = 128
// The hash size of BLAKE2b-512 in bytes.
Size = 64
// The hash size of BLAKE2b-384 in bytes.
Size384 = 48
// The hash size of BLAKE2b-256 in bytes.
Size256 = 32
)
var (
useAVX2 bool
useAVX bool
useSSE4 bool
)
var errKeySize = errors.New("blake2b: invalid key size")
var iv = [8]uint64{
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
}
// Sum512 returns the BLAKE2b-512 checksum of the data.
func Sum512(data []byte) [Size]byte {
var sum [Size]byte
checkSum(&sum, Size, data)
return sum
}
// Sum384 returns the BLAKE2b-384 checksum of the data.
func Sum384(data []byte) [Size384]byte {
var sum [Size]byte
var sum384 [Size384]byte
checkSum(&sum, Size384, data)
copy(sum384[:], sum[:Size384])
return sum384
}
// Sum256 returns the BLAKE2b-256 checksum of the data.
func Sum256(data []byte) [Size256]byte {
var sum [Size]byte
var sum256 [Size256]byte
checkSum(&sum, Size256, data)
copy(sum256[:], sum[:Size256])
return sum256
}
// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) }
// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) }
// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) }
func newDigest(hashSize int, key []byte) (*digest, error) {
if len(key) > Size {
return nil, errKeySize
}
d := &digest{
size: hashSize,
keyLen: len(key),
}
copy(d.key[:], key)
d.Reset()
return d, nil
}
func checkSum(sum *[Size]byte, hashSize int, data []byte) {
h := iv
h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24)
var c [2]uint64
if length := len(data); length > BlockSize {
n := length &^ (BlockSize - 1)
if length == n {
n -= BlockSize
}
hashBlocks(&h, &c, 0, data[:n])
data = data[n:]
}
var block [BlockSize]byte
offset := copy(block[:], data)
remaining := uint64(BlockSize - offset)
if c[0] < remaining {
c[1]--
}
c[0] -= remaining
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
for i, v := range h[:(hashSize+7)/8] {
binary.LittleEndian.PutUint64(sum[8*i:], v)
}
}
type digest struct {
h [8]uint64
c [2]uint64
size int
block [BlockSize]byte
offset int
key [BlockSize]byte
keyLen int
}
func (d *digest) BlockSize() int { return BlockSize }
func (d *digest) Size() int { return d.size }
func (d *digest) Reset() {
d.h = iv
d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24)
d.offset, d.c[0], d.c[1] = 0, 0, 0
if d.keyLen > 0 {
d.block = d.key
d.offset = BlockSize
}
}
func (d *digest) Write(p []byte) (n int, err error) {
n = len(p)
if d.offset > 0 {
remaining := BlockSize - d.offset
if n <= remaining {
d.offset += copy(d.block[d.offset:], p)
return
}
copy(d.block[d.offset:], p[:remaining])
hashBlocks(&d.h, &d.c, 0, d.block[:])
d.offset = 0
p = p[remaining:]
}
if length := len(p); length > BlockSize {
nn := length &^ (BlockSize - 1)
if length == nn {
nn -= BlockSize
}
hashBlocks(&d.h, &d.c, 0, p[:nn])
p = p[nn:]
}
if len(p) > 0 {
d.offset += copy(d.block[:], p)
}
return
}
func (d *digest) Sum(b []byte) []byte {
var block [BlockSize]byte
copy(block[:], d.block[:d.offset])
remaining := uint64(BlockSize - d.offset)
c := d.c
if c[0] < remaining {
c[1]--
}
c[0] -= remaining
h := d.h
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
var sum [Size]byte
for i, v := range h[:(d.size+7)/8] {
binary.LittleEndian.PutUint64(sum[8*i:], v)
}
return append(b, sum[:d.size]...)
}

View file

@ -0,0 +1,43 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine
package blake2b
func init() {
useAVX2 = supportsAVX2()
useAVX = supportsAVX()
useSSE4 = supportsSSE4()
}
//go:noescape
func supportsSSE4() bool
//go:noescape
func supportsAVX() bool
//go:noescape
func supportsAVX2() bool
//go:noescape
func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
//go:noescape
func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
//go:noescape
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
if useAVX2 {
hashBlocksAVX2(h, c, flag, blocks)
} else if useAVX {
hashBlocksAVX(h, c, flag, blocks)
} else if useSSE4 {
hashBlocksSSE4(h, c, flag, blocks)
} else {
hashBlocksGeneric(h, c, flag, blocks)
}
}

762
vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s generated vendored Normal file
View file

@ -0,0 +1,762 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine
#include "textflag.h"
DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39
#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93
#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e
#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93
#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39
#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
VPADDQ m0, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFD $-79, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPSHUFB c40, Y1, Y1; \
VPADDQ m1, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFB c48, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPADDQ Y1, Y1, t; \
VPSRLQ $63, Y1, Y1; \
VPXOR t, Y1, Y1; \
VPERMQ_0x39_Y1_Y1; \
VPERMQ_0x4E_Y2_Y2; \
VPERMQ_0x93_Y3_Y3; \
VPADDQ m2, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFD $-79, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPSHUFB c40, Y1, Y1; \
VPADDQ m3, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFB c48, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPADDQ Y1, Y1, t; \
VPSRLQ $63, Y1, Y1; \
VPXOR t, Y1, Y1; \
VPERMQ_0x39_Y3_Y3; \
VPERMQ_0x4E_Y2_Y2; \
VPERMQ_0x93_Y1_Y1
#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E
#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26
#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E
#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36
#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E
#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n
#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n
#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n
#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n
#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n
#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01
#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01
#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01
#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01
#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01
#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01
#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01
#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8
#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01
// load msg: Y12 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \
VMOVQ_SI_X12(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X12(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y12, Y12
// load msg: Y13 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \
VMOVQ_SI_X13(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X13(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y13, Y13
// load msg: Y14 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \
VMOVQ_SI_X14(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X14(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y14, Y14
// load msg: Y15 = (i0, i1, i2, i3)
// i0, i1, i2, i3 must not be 0
#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \
VMOVQ_SI_X15(i0*8); \
VMOVQ_SI_X11(i2*8); \
VPINSRQ_1_SI_X15(i1*8); \
VPINSRQ_1_SI_X11(i3*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \
VMOVQ_SI_X12_0; \
VMOVQ_SI_X11(4*8); \
VPINSRQ_1_SI_X12(2*8); \
VPINSRQ_1_SI_X11(6*8); \
VINSERTI128 $1, X11, Y12, Y12; \
LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \
LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \
LOAD_MSG_AVX2_Y15(9, 11, 13, 15)
#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \
LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \
LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \
VMOVQ_SI_X11(11*8); \
VPSHUFD $0x4E, 0*8(SI), X14; \
VPINSRQ_1_SI_X11(5*8); \
VINSERTI128 $1, X11, Y14, Y14; \
LOAD_MSG_AVX2_Y15(12, 2, 7, 3)
#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \
VMOVQ_SI_X11(5*8); \
VMOVDQU 11*8(SI), X12; \
VPINSRQ_1_SI_X11(15*8); \
VINSERTI128 $1, X11, Y12, Y12; \
VMOVQ_SI_X13(8*8); \
VMOVQ_SI_X11(2*8); \
VPINSRQ_1_SI_X13_0; \
VPINSRQ_1_SI_X11(13*8); \
VINSERTI128 $1, X11, Y13, Y13; \
LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \
LOAD_MSG_AVX2_Y15(14, 6, 1, 4)
#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \
LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \
LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \
LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \
VMOVQ_SI_X15(6*8); \
VMOVQ_SI_X11_0; \
VPINSRQ_1_SI_X15(10*8); \
VPINSRQ_1_SI_X11(8*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \
LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \
VMOVQ_SI_X13_0; \
VMOVQ_SI_X11(4*8); \
VPINSRQ_1_SI_X13(7*8); \
VPINSRQ_1_SI_X11(15*8); \
VINSERTI128 $1, X11, Y13, Y13; \
LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \
LOAD_MSG_AVX2_Y15(1, 12, 8, 13)
#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \
VMOVQ_SI_X12(2*8); \
VMOVQ_SI_X11_0; \
VPINSRQ_1_SI_X12(6*8); \
VPINSRQ_1_SI_X11(8*8); \
VINSERTI128 $1, X11, Y12, Y12; \
LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \
LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \
LOAD_MSG_AVX2_Y15(13, 5, 14, 9)
#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \
LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \
LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \
VMOVQ_SI_X14_0; \
VPSHUFD $0x4E, 8*8(SI), X11; \
VPINSRQ_1_SI_X14(6*8); \
VINSERTI128 $1, X11, Y14, Y14; \
LOAD_MSG_AVX2_Y15(7, 3, 2, 11)
#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \
LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \
LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \
LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \
VMOVQ_SI_X15_0; \
VMOVQ_SI_X11(6*8); \
VPINSRQ_1_SI_X15(4*8); \
VPINSRQ_1_SI_X11(10*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \
VMOVQ_SI_X12(6*8); \
VMOVQ_SI_X11(11*8); \
VPINSRQ_1_SI_X12(14*8); \
VPINSRQ_1_SI_X11_0; \
VINSERTI128 $1, X11, Y12, Y12; \
LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \
VMOVQ_SI_X11(1*8); \
VMOVDQU 12*8(SI), X14; \
VPINSRQ_1_SI_X11(10*8); \
VINSERTI128 $1, X11, Y14, Y14; \
VMOVQ_SI_X15(2*8); \
VMOVDQU 4*8(SI), X11; \
VPINSRQ_1_SI_X15(7*8); \
VINSERTI128 $1, X11, Y15, Y15
#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \
LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \
VMOVQ_SI_X13(2*8); \
VPSHUFD $0x4E, 5*8(SI), X11; \
VPINSRQ_1_SI_X13(4*8); \
VINSERTI128 $1, X11, Y13, Y13; \
LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \
VMOVQ_SI_X15(11*8); \
VMOVQ_SI_X11(12*8); \
VPINSRQ_1_SI_X15(14*8); \
VPINSRQ_1_SI_X11_0; \
VINSERTI128 $1, X11, Y15, Y15
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, DX
MOVQ SP, R9
ADDQ $31, R9
ANDQ $~31, R9
MOVQ R9, SP
MOVQ CX, 16(SP)
XORQ CX, CX
MOVQ CX, 24(SP)
VMOVDQU ·AVX2_c40<>(SB), Y4
VMOVDQU ·AVX2_c48<>(SB), Y5
VMOVDQU 0(AX), Y8
VMOVDQU 32(AX), Y9
VMOVDQU ·AVX2_iv0<>(SB), Y6
VMOVDQU ·AVX2_iv1<>(SB), Y7
MOVQ 0(BX), R8
MOVQ 8(BX), R9
MOVQ R9, 8(SP)
loop:
ADDQ $128, R8
MOVQ R8, 0(SP)
CMPQ R8, $128
JGE noinc
INCQ R9
MOVQ R9, 8(SP)
noinc:
VMOVDQA Y8, Y0
VMOVDQA Y9, Y1
VMOVDQA Y6, Y2
VPXOR 0(SP), Y7, Y3
LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
VMOVDQA Y12, 32(SP)
VMOVDQA Y13, 64(SP)
VMOVDQA Y14, 96(SP)
VMOVDQA Y15, 128(SP)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
VMOVDQA Y12, 160(SP)
VMOVDQA Y13, 192(SP)
VMOVDQA Y14, 224(SP)
VMOVDQA Y15, 256(SP)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
ROUND_AVX2(32(SP), 64(SP), 96(SP), 128(SP), Y10, Y4, Y5)
ROUND_AVX2(160(SP), 192(SP), 224(SP), 256(SP), Y10, Y4, Y5)
VPXOR Y0, Y8, Y8
VPXOR Y1, Y9, Y9
VPXOR Y2, Y8, Y8
VPXOR Y3, Y9, Y9
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
VMOVDQU Y8, 0(AX)
VMOVDQU Y9, 32(AX)
VZEROUPPER
MOVQ DX, SP
RET
#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB
#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF
#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD
#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE
#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7
#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF
#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7
#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF
#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7
#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7
#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF
#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF
#define SHUFFLE_AVX() \
VMOVDQA X6, X13; \
VMOVDQA X2, X14; \
VMOVDQA X4, X6; \
VPUNPCKLQDQ_X13_X13_X15; \
VMOVDQA X5, X4; \
VMOVDQA X6, X5; \
VPUNPCKHQDQ_X15_X7_X6; \
VPUNPCKLQDQ_X7_X7_X15; \
VPUNPCKHQDQ_X15_X13_X7; \
VPUNPCKLQDQ_X3_X3_X15; \
VPUNPCKHQDQ_X15_X2_X2; \
VPUNPCKLQDQ_X14_X14_X15; \
VPUNPCKHQDQ_X15_X3_X3; \
#define SHUFFLE_AVX_INV() \
VMOVDQA X2, X13; \
VMOVDQA X4, X14; \
VPUNPCKLQDQ_X2_X2_X15; \
VMOVDQA X5, X4; \
VPUNPCKHQDQ_X15_X3_X2; \
VMOVDQA X14, X5; \
VPUNPCKLQDQ_X3_X3_X15; \
VMOVDQA X6, X14; \
VPUNPCKHQDQ_X15_X13_X3; \
VPUNPCKLQDQ_X7_X7_X15; \
VPUNPCKHQDQ_X15_X6_X6; \
VPUNPCKLQDQ_X14_X14_X15; \
VPUNPCKHQDQ_X15_X7_X7; \
#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
VPADDQ m0, v0, v0; \
VPADDQ v2, v0, v0; \
VPADDQ m1, v1, v1; \
VPADDQ v3, v1, v1; \
VPXOR v0, v6, v6; \
VPXOR v1, v7, v7; \
VPSHUFD $-79, v6, v6; \
VPSHUFD $-79, v7, v7; \
VPADDQ v6, v4, v4; \
VPADDQ v7, v5, v5; \
VPXOR v4, v2, v2; \
VPXOR v5, v3, v3; \
VPSHUFB c40, v2, v2; \
VPSHUFB c40, v3, v3; \
VPADDQ m2, v0, v0; \
VPADDQ v2, v0, v0; \
VPADDQ m3, v1, v1; \
VPADDQ v3, v1, v1; \
VPXOR v0, v6, v6; \
VPXOR v1, v7, v7; \
VPSHUFB c48, v6, v6; \
VPSHUFB c48, v7, v7; \
VPADDQ v6, v4, v4; \
VPADDQ v7, v5, v5; \
VPXOR v4, v2, v2; \
VPXOR v5, v3, v3; \
VPADDQ v2, v2, t0; \
VPSRLQ $63, v2, v2; \
VPXOR t0, v2, v2; \
VPADDQ v3, v3, t0; \
VPSRLQ $63, v3, v3; \
VPXOR t0, v3, v3
// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7)
// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0
#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \
VMOVQ_SI_X12(i0*8); \
VMOVQ_SI_X13(i2*8); \
VMOVQ_SI_X14(i4*8); \
VMOVQ_SI_X15(i6*8); \
VPINSRQ_1_SI_X12(i1*8); \
VPINSRQ_1_SI_X13(i3*8); \
VPINSRQ_1_SI_X14(i5*8); \
VPINSRQ_1_SI_X15(i7*8)
// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7)
#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \
VMOVQ_SI_X12_0; \
VMOVQ_SI_X13(4*8); \
VMOVQ_SI_X14(1*8); \
VMOVQ_SI_X15(5*8); \
VPINSRQ_1_SI_X12(2*8); \
VPINSRQ_1_SI_X13(6*8); \
VPINSRQ_1_SI_X14(3*8); \
VPINSRQ_1_SI_X15(7*8)
// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3)
#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \
VPSHUFD $0x4E, 0*8(SI), X12; \
VMOVQ_SI_X13(11*8); \
VMOVQ_SI_X14(12*8); \
VMOVQ_SI_X15(7*8); \
VPINSRQ_1_SI_X13(5*8); \
VPINSRQ_1_SI_X14(2*8); \
VPINSRQ_1_SI_X15(3*8)
// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13)
#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \
VMOVDQU 11*8(SI), X12; \
VMOVQ_SI_X13(5*8); \
VMOVQ_SI_X14(8*8); \
VMOVQ_SI_X15(2*8); \
VPINSRQ_1_SI_X13(15*8); \
VPINSRQ_1_SI_X14_0; \
VPINSRQ_1_SI_X15(13*8)
// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8)
#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \
VMOVQ_SI_X12(2*8); \
VMOVQ_SI_X13(4*8); \
VMOVQ_SI_X14(6*8); \
VMOVQ_SI_X15_0; \
VPINSRQ_1_SI_X12(5*8); \
VPINSRQ_1_SI_X13(15*8); \
VPINSRQ_1_SI_X14(10*8); \
VPINSRQ_1_SI_X15(8*8)
// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15)
#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \
VMOVQ_SI_X12(9*8); \
VMOVQ_SI_X13(2*8); \
VMOVQ_SI_X14_0; \
VMOVQ_SI_X15(4*8); \
VPINSRQ_1_SI_X12(5*8); \
VPINSRQ_1_SI_X13(10*8); \
VPINSRQ_1_SI_X14(7*8); \
VPINSRQ_1_SI_X15(15*8)
// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3)
#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \
VMOVQ_SI_X12(2*8); \
VMOVQ_SI_X13_0; \
VMOVQ_SI_X14(12*8); \
VMOVQ_SI_X15(11*8); \
VPINSRQ_1_SI_X12(6*8); \
VPINSRQ_1_SI_X13(8*8); \
VPINSRQ_1_SI_X14(10*8); \
VPINSRQ_1_SI_X15(3*8)
// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11)
#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \
MOVQ 0*8(SI), X12; \
VPSHUFD $0x4E, 8*8(SI), X13; \
MOVQ 7*8(SI), X14; \
MOVQ 2*8(SI), X15; \
VPINSRQ_1_SI_X12(6*8); \
VPINSRQ_1_SI_X14(3*8); \
VPINSRQ_1_SI_X15(11*8)
// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8)
#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \
MOVQ 6*8(SI), X12; \
MOVQ 11*8(SI), X13; \
MOVQ 15*8(SI), X14; \
MOVQ 3*8(SI), X15; \
VPINSRQ_1_SI_X12(14*8); \
VPINSRQ_1_SI_X13_0; \
VPINSRQ_1_SI_X14(9*8); \
VPINSRQ_1_SI_X15(8*8)
// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10)
#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \
MOVQ 5*8(SI), X12; \
MOVQ 8*8(SI), X13; \
MOVQ 0*8(SI), X14; \
MOVQ 6*8(SI), X15; \
VPINSRQ_1_SI_X12(15*8); \
VPINSRQ_1_SI_X13(2*8); \
VPINSRQ_1_SI_X14(4*8); \
VPINSRQ_1_SI_X15(10*8)
// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5)
#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \
VMOVDQU 12*8(SI), X12; \
MOVQ 1*8(SI), X13; \
MOVQ 2*8(SI), X14; \
VPINSRQ_1_SI_X13(10*8); \
VPINSRQ_1_SI_X14(7*8); \
VMOVDQU 4*8(SI), X15
// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0)
#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \
MOVQ 15*8(SI), X12; \
MOVQ 3*8(SI), X13; \
MOVQ 11*8(SI), X14; \
MOVQ 12*8(SI), X15; \
VPINSRQ_1_SI_X12(9*8); \
VPINSRQ_1_SI_X13(13*8); \
VPINSRQ_1_SI_X14(14*8); \
VPINSRQ_1_SI_X15_0
// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, BP
MOVQ SP, R9
ADDQ $15, R9
ANDQ $~15, R9
MOVQ R9, SP
VMOVDQU ·AVX_c40<>(SB), X0
VMOVDQU ·AVX_c48<>(SB), X1
VMOVDQA X0, X8
VMOVDQA X1, X9
VMOVDQU ·AVX_iv3<>(SB), X0
VMOVDQA X0, 0(SP)
XORQ CX, 0(SP) // 0(SP) = ·AVX_iv3 ^ (CX || 0)
VMOVDQU 0(AX), X10
VMOVDQU 16(AX), X11
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
MOVQ 0(BX), R8
MOVQ 8(BX), R9
loop:
ADDQ $128, R8
CMPQ R8, $128
JGE noinc
INCQ R9
noinc:
VMOVQ_R8_X15
VPINSRQ_1_R9_X15
VMOVDQA X10, X0
VMOVDQA X11, X1
VMOVDQU ·AVX_iv0<>(SB), X4
VMOVDQU ·AVX_iv1<>(SB), X5
VMOVDQU ·AVX_iv2<>(SB), X6
VPXOR X15, X6, X6
VMOVDQA 0(SP), X7
LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
VMOVDQA X12, 16(SP)
VMOVDQA X13, 32(SP)
VMOVDQA X14, 48(SP)
VMOVDQA X15, 64(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
VMOVDQA X12, 80(SP)
VMOVDQA X13, 96(SP)
VMOVDQA X14, 112(SP)
VMOVDQA X15, 128(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
VMOVDQA X12, 144(SP)
VMOVDQA X13, 160(SP)
VMOVDQA X14, 176(SP)
VMOVDQA X15, 192(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
VMOVDQA X12, 208(SP)
VMOVDQA X13, 224(SP)
VMOVDQA X14, 240(SP)
VMOVDQA X15, 256(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_11_12_5_15_8_0_2_13()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_2_5_4_15_6_10_0_8()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_9_5_2_10_0_7_4_15()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_2_6_0_8_12_10_11_3()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_0_6_9_8_7_3_2_11()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_5_15_8_2_0_4_6_10()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX_6_14_11_0_15_9_3_8()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_12_13_1_10_2_7_4_5()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX()
LOAD_MSG_AVX_15_9_3_13_11_14_12_0()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
SHUFFLE_AVX_INV()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X15, X8, X9)
SHUFFLE_AVX()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X15, X8, X9)
SHUFFLE_AVX_INV()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X15, X8, X9)
SHUFFLE_AVX()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X15, X8, X9)
SHUFFLE_AVX_INV()
VMOVDQU 32(AX), X14
VMOVDQU 48(AX), X15
VPXOR X0, X10, X10
VPXOR X1, X11, X11
VPXOR X2, X14, X14
VPXOR X3, X15, X15
VPXOR X4, X10, X10
VPXOR X5, X11, X11
VPXOR X6, X14, X2
VPXOR X7, X15, X3
VMOVDQU X2, 32(AX)
VMOVDQU X3, 48(AX)
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
VMOVDQU X10, 0(AX)
VMOVDQU X11, 16(AX)
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
VZEROUPPER
MOVQ BP, SP
RET
// func supportsAVX2() bool
TEXT ·supportsAVX2(SB), 4, $0-1
MOVQ runtime·support_avx2(SB), AX
MOVB AX, ret+0(FP)
RET
// func supportsAVX() bool
TEXT ·supportsAVX(SB), 4, $0-1
MOVQ runtime·support_avx(SB), AX
MOVB AX, ret+0(FP)
RET

25
vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go generated vendored Normal file
View file

@ -0,0 +1,25 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.7,amd64,!gccgo,!appengine
package blake2b
func init() {
useSSE4 = supportsSSE4()
}
//go:noescape
func supportsSSE4() bool
//go:noescape
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
if useSSE4 {
hashBlocksSSE4(h, c, flag, blocks)
} else {
hashBlocksGeneric(h, c, flag, blocks)
}
}

290
vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s generated vendored Normal file
View file

@ -0,0 +1,290 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
#include "textflag.h"
DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
MOVO v4, t1; \
MOVO v5, v4; \
MOVO t1, v5; \
MOVO v6, t1; \
PUNPCKLQDQ v6, t2; \
PUNPCKHQDQ v7, v6; \
PUNPCKHQDQ t2, v6; \
PUNPCKLQDQ v7, t2; \
MOVO t1, v7; \
MOVO v2, t1; \
PUNPCKHQDQ t2, v7; \
PUNPCKLQDQ v3, t2; \
PUNPCKHQDQ t2, v2; \
PUNPCKLQDQ t1, t2; \
PUNPCKHQDQ t2, v3
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
MOVO v4, t1; \
MOVO v5, v4; \
MOVO t1, v5; \
MOVO v2, t1; \
PUNPCKLQDQ v2, t2; \
PUNPCKHQDQ v3, v2; \
PUNPCKHQDQ t2, v2; \
PUNPCKLQDQ v3, t2; \
MOVO t1, v3; \
MOVO v6, t1; \
PUNPCKHQDQ t2, v3; \
PUNPCKLQDQ v7, t2; \
PUNPCKHQDQ t2, v6; \
PUNPCKLQDQ t1, t2; \
PUNPCKHQDQ t2, v7
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
PADDQ m0, v0; \
PADDQ m1, v1; \
PADDQ v2, v0; \
PADDQ v3, v1; \
PXOR v0, v6; \
PXOR v1, v7; \
PSHUFD $0xB1, v6, v6; \
PSHUFD $0xB1, v7, v7; \
PADDQ v6, v4; \
PADDQ v7, v5; \
PXOR v4, v2; \
PXOR v5, v3; \
PSHUFB c40, v2; \
PSHUFB c40, v3; \
PADDQ m2, v0; \
PADDQ m3, v1; \
PADDQ v2, v0; \
PADDQ v3, v1; \
PXOR v0, v6; \
PXOR v1, v7; \
PSHUFB c48, v6; \
PSHUFB c48, v7; \
PADDQ v6, v4; \
PADDQ v7, v5; \
PXOR v4, v2; \
PXOR v5, v3; \
MOVOU v2, t0; \
PADDQ v2, t0; \
PSRLQ $63, v2; \
PXOR t0, v2; \
MOVOU v3, t0; \
PADDQ v3, t0; \
PSRLQ $63, v3; \
PXOR t0, v3
#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
MOVQ i0*8(src), m0; \
PINSRQ $1, i1*8(src), m0; \
MOVQ i2*8(src), m1; \
PINSRQ $1, i3*8(src), m1; \
MOVQ i4*8(src), m2; \
PINSRQ $1, i5*8(src), m2; \
MOVQ i6*8(src), m3; \
PINSRQ $1, i7*8(src), m3
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, BP
MOVQ SP, R9
ADDQ $15, R9
ANDQ $~15, R9
MOVQ R9, SP
MOVOU ·iv3<>(SB), X0
MOVO X0, 0(SP)
XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0)
MOVOU ·c40<>(SB), X13
MOVOU ·c48<>(SB), X14
MOVOU 0(AX), X12
MOVOU 16(AX), X15
MOVQ 0(BX), R8
MOVQ 8(BX), R9
loop:
ADDQ $128, R8
CMPQ R8, $128
JGE noinc
INCQ R9
noinc:
MOVQ R8, X8
PINSRQ $1, R9, X8
MOVO X12, X0
MOVO X15, X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU ·iv0<>(SB), X4
MOVOU ·iv1<>(SB), X5
MOVOU ·iv2<>(SB), X6
PXOR X8, X6
MOVO 0(SP), X7
LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
MOVO X8, 16(SP)
MOVO X9, 32(SP)
MOVO X10, 48(SP)
MOVO X11, 64(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
MOVO X8, 80(SP)
MOVO X9, 96(SP)
MOVO X10, 112(SP)
MOVO X11, 128(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
MOVO X8, 144(SP)
MOVO X9, 160(SP)
MOVO X10, 176(SP)
MOVO X11, 192(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
MOVO X8, 208(SP)
MOVO X9, 224(SP)
MOVO X10, 240(SP)
MOVO X11, 256(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
MOVOU 32(AX), X10
MOVOU 48(AX), X11
PXOR X0, X12
PXOR X1, X15
PXOR X2, X10
PXOR X3, X11
PXOR X4, X12
PXOR X5, X15
PXOR X6, X10
PXOR X7, X11
MOVOU X10, 32(AX)
MOVOU X11, 48(AX)
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
MOVOU X12, 0(AX)
MOVOU X15, 16(AX)
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
MOVQ BP, SP
RET
// func supportsSSE4() bool
TEXT ·supportsSSE4(SB), 4, $0-1
MOVL $1, AX
CPUID
SHRL $19, CX // Bit 19 indicates SSE4 support
ANDL $1, CX // CX != 0 if support SSE4
MOVB CX, ret+0(FP)
RET

179
vendor/golang.org/x/crypto/blake2b/blake2b_generic.go generated vendored Normal file
View file

@ -0,0 +1,179 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blake2b
import "encoding/binary"
// the precomputed values for BLAKE2b
// there are 12 16-byte arrays - one for each round
// the entries are calculated from the sigma constants.
var precomputed = [12][16]byte{
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
}
func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
var m [16]uint64
c0, c1 := c[0], c[1]
for i := 0; i < len(blocks); {
c0 += BlockSize
if c0 < BlockSize {
c1++
}
v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]
v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]
v12 ^= c0
v13 ^= c1
v14 ^= flag
for j := range m {
m[j] = binary.LittleEndian.Uint64(blocks[i:])
i += 8
}
for j := range precomputed {
s := &(precomputed[j])
v0 += m[s[0]]
v0 += v4
v12 ^= v0
v12 = v12<<(64-32) | v12>>32
v8 += v12
v4 ^= v8
v4 = v4<<(64-24) | v4>>24
v1 += m[s[1]]
v1 += v5
v13 ^= v1
v13 = v13<<(64-32) | v13>>32
v9 += v13
v5 ^= v9
v5 = v5<<(64-24) | v5>>24
v2 += m[s[2]]
v2 += v6
v14 ^= v2
v14 = v14<<(64-32) | v14>>32
v10 += v14
v6 ^= v10
v6 = v6<<(64-24) | v6>>24
v3 += m[s[3]]
v3 += v7
v15 ^= v3
v15 = v15<<(64-32) | v15>>32
v11 += v15
v7 ^= v11
v7 = v7<<(64-24) | v7>>24
v0 += m[s[4]]
v0 += v4
v12 ^= v0
v12 = v12<<(64-16) | v12>>16
v8 += v12
v4 ^= v8
v4 = v4<<(64-63) | v4>>63
v1 += m[s[5]]
v1 += v5
v13 ^= v1
v13 = v13<<(64-16) | v13>>16
v9 += v13
v5 ^= v9
v5 = v5<<(64-63) | v5>>63
v2 += m[s[6]]
v2 += v6
v14 ^= v2
v14 = v14<<(64-16) | v14>>16
v10 += v14
v6 ^= v10
v6 = v6<<(64-63) | v6>>63
v3 += m[s[7]]
v3 += v7
v15 ^= v3
v15 = v15<<(64-16) | v15>>16
v11 += v15
v7 ^= v11
v7 = v7<<(64-63) | v7>>63
v0 += m[s[8]]
v0 += v5
v15 ^= v0
v15 = v15<<(64-32) | v15>>32
v10 += v15
v5 ^= v10
v5 = v5<<(64-24) | v5>>24
v1 += m[s[9]]
v1 += v6
v12 ^= v1
v12 = v12<<(64-32) | v12>>32
v11 += v12
v6 ^= v11
v6 = v6<<(64-24) | v6>>24
v2 += m[s[10]]
v2 += v7
v13 ^= v2
v13 = v13<<(64-32) | v13>>32
v8 += v13
v7 ^= v8
v7 = v7<<(64-24) | v7>>24
v3 += m[s[11]]
v3 += v4
v14 ^= v3
v14 = v14<<(64-32) | v14>>32
v9 += v14
v4 ^= v9
v4 = v4<<(64-24) | v4>>24
v0 += m[s[12]]
v0 += v5
v15 ^= v0
v15 = v15<<(64-16) | v15>>16
v10 += v15
v5 ^= v10
v5 = v5<<(64-63) | v5>>63
v1 += m[s[13]]
v1 += v6
v12 ^= v1
v12 = v12<<(64-16) | v12>>16
v11 += v12
v6 ^= v11
v6 = v6<<(64-63) | v6>>63
v2 += m[s[14]]
v2 += v7
v13 ^= v2
v13 = v13<<(64-16) | v13>>16
v8 += v13
v7 ^= v8
v7 = v7<<(64-63) | v7>>63
v3 += m[s[15]]
v3 += v4
v14 ^= v3
v14 = v14<<(64-16) | v14>>16
v9 += v14
v4 ^= v9
v4 = v4<<(64-63) | v4>>63
}
h[0] ^= v0 ^ v8
h[1] ^= v1 ^ v9
h[2] ^= v2 ^ v10
h[3] ^= v3 ^ v11
h[4] ^= v4 ^ v12
h[5] ^= v5 ^ v13
h[6] ^= v6 ^ v14
h[7] ^= v7 ^ v15
}
c[0], c[1] = c0, c1
}

11
vendor/golang.org/x/crypto/blake2b/blake2b_ref.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 appengine gccgo
package blake2b
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
hashBlocksGeneric(h, c, flag, blocks)
}

448
vendor/golang.org/x/crypto/blake2b/blake2b_test.go generated vendored Normal file
View file

@ -0,0 +1,448 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blake2b
import (
"bytes"
"encoding/hex"
"fmt"
"hash"
"testing"
)
func fromHex(s string) []byte {
b, err := hex.DecodeString(s)
if err != nil {
panic(err)
}
return b
}
func TestHashes(t *testing.T) {
defer func(sse4, avx, avx2 bool) {
useSSE4, useAVX, useAVX2 = sse4, useAVX, avx2
}(useSSE4, useAVX, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testHashes(t)
useAVX2 = false
}
if useAVX {
t.Log("AVX version")
testHashes(t)
useAVX = false
}
if useSSE4 {
t.Log("SSE4 version")
testHashes(t)
useSSE4 = false
}
t.Log("generic version")
testHashes(t)
}
func testHashes(t *testing.T) {
key, _ := hex.DecodeString("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f")
input := make([]byte, 255)
for i := range input {
input[i] = byte(i)
}
for i, expectedHex := range hashes {
h, err := New512(key)
if err != nil {
t.Fatalf("#%d: error from New512: %v", i, err)
}
h.Write(input[:i])
sum := h.Sum(nil)
if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex {
t.Fatalf("#%d (single write): got %s, wanted %s", i, gotHex, expectedHex)
}
h.Reset()
for j := 0; j < i; j++ {
h.Write(input[j : j+1])
}
sum = h.Sum(sum[:0])
if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex {
t.Fatalf("#%d (byte-by-byte): got %s, wanted %s", i, gotHex, expectedHex)
}
}
}
func generateSequence(out []byte, seed uint32) {
a := 0xDEAD4BAD * seed // prime
b := uint32(1)
for i := range out { // fill the buf
a, b = b, a+b
out[i] = byte(b >> 24)
}
}
func computeMAC(msg []byte, hashSize int, key []byte) (sum []byte) {
var h hash.Hash
switch hashSize {
case Size:
h, _ = New512(key)
case Size384:
h, _ = New384(key)
case Size256:
h, _ = New256(key)
case 20:
h, _ = newDigest(20, key)
default:
panic("unexpected hashSize")
}
h.Write(msg)
return h.Sum(sum)
}
func computeHash(msg []byte, hashSize int) (sum []byte) {
switch hashSize {
case Size:
hash := Sum512(msg)
return hash[:]
case Size384:
hash := Sum384(msg)
return hash[:]
case Size256:
hash := Sum256(msg)
return hash[:]
case 20:
var hash [64]byte
checkSum(&hash, 20, msg)
return hash[:20]
default:
panic("unexpected hashSize")
}
}
// Test function from RFC 7693.
func TestSelfTest(t *testing.T) {
hashLens := [4]int{20, 32, 48, 64}
msgLens := [6]int{0, 3, 128, 129, 255, 1024}
msg := make([]byte, 1024)
key := make([]byte, 64)
h, _ := New256(nil)
for _, hashSize := range hashLens {
for _, msgLength := range msgLens {
generateSequence(msg[:msgLength], uint32(msgLength)) // unkeyed hash
md := computeHash(msg[:msgLength], hashSize)
h.Write(md)
generateSequence(key[:], uint32(hashSize)) // keyed hash
md = computeMAC(msg[:msgLength], hashSize, key[:hashSize])
h.Write(md)
}
}
sum := h.Sum(nil)
expected := [32]byte{
0xc2, 0x3a, 0x78, 0x00, 0xd9, 0x81, 0x23, 0xbd,
0x10, 0xf5, 0x06, 0xc6, 0x1e, 0x29, 0xda, 0x56,
0x03, 0xd7, 0x63, 0xb8, 0xbb, 0xad, 0x2e, 0x73,
0x7f, 0x5e, 0x76, 0x5a, 0x7b, 0xcc, 0xd4, 0x75,
}
if !bytes.Equal(sum, expected[:]) {
t.Fatalf("got %x, wanted %x", sum, expected)
}
}
// Benchmarks
func benchmarkSum(b *testing.B, size int) {
data := make([]byte, size)
b.SetBytes(int64(size))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Sum512(data)
}
}
func benchmarkWrite(b *testing.B, size int) {
data := make([]byte, size)
h, _ := New512(nil)
b.SetBytes(int64(size))
b.ResetTimer()
for i := 0; i < b.N; i++ {
h.Write(data)
}
}
func BenchmarkWrite128(b *testing.B) { benchmarkWrite(b, 128) }
func BenchmarkWrite1K(b *testing.B) { benchmarkWrite(b, 1024) }
func BenchmarkSum128(b *testing.B) { benchmarkSum(b, 128) }
func BenchmarkSum1K(b *testing.B) { benchmarkSum(b, 1024) }
// These values were taken from https://blake2.net/blake2b-test.txt.
var hashes = []string{
"10ebb67700b1868efb4417987acf4690ae9d972fb7a590c2f02871799aaa4786b5e996e8f0f4eb981fc214b005f42d2ff4233499391653df7aefcbc13fc51568",
"961f6dd1e4dd30f63901690c512e78e4b45e4742ed197c3c5e45c549fd25f2e4187b0bc9fe30492b16b0d0bc4ef9b0f34c7003fac09a5ef1532e69430234cebd",
"da2cfbe2d8409a0f38026113884f84b50156371ae304c4430173d08a99d9fb1b983164a3770706d537f49e0c916d9f32b95cc37a95b99d857436f0232c88a965",
"33d0825dddf7ada99b0e7e307104ad07ca9cfd9692214f1561356315e784f3e5a17e364ae9dbb14cb2036df932b77f4b292761365fb328de7afdc6d8998f5fc1",
"beaa5a3d08f3807143cf621d95cd690514d0b49efff9c91d24b59241ec0eefa5f60196d407048bba8d2146828ebcb0488d8842fd56bb4f6df8e19c4b4daab8ac",
"098084b51fd13deae5f4320de94a688ee07baea2800486689a8636117b46c1f4c1f6af7f74ae7c857600456a58a3af251dc4723a64cc7c0a5ab6d9cac91c20bb",
"6044540d560853eb1c57df0077dd381094781cdb9073e5b1b3d3f6c7829e12066bbaca96d989a690de72ca3133a83652ba284a6d62942b271ffa2620c9e75b1f",
"7a8cfe9b90f75f7ecb3acc053aaed6193112b6f6a4aeeb3f65d3de541942deb9e2228152a3c4bbbe72fc3b12629528cfbb09fe630f0474339f54abf453e2ed52",
"380beaf6ea7cc9365e270ef0e6f3a64fb902acae51dd5512f84259ad2c91f4bc4108db73192a5bbfb0cbcf71e46c3e21aee1c5e860dc96e8eb0b7b8426e6abe9",
"60fe3c4535e1b59d9a61ea8500bfac41a69dffb1ceadd9aca323e9a625b64da5763bad7226da02b9c8c4f1a5de140ac5a6c1124e4f718ce0b28ea47393aa6637",
"4fe181f54ad63a2983feaaf77d1e7235c2beb17fa328b6d9505bda327df19fc37f02c4b6f0368ce23147313a8e5738b5fa2a95b29de1c7f8264eb77b69f585cd",
"f228773ce3f3a42b5f144d63237a72d99693adb8837d0e112a8a0f8ffff2c362857ac49c11ec740d1500749dac9b1f4548108bf3155794dcc9e4082849e2b85b",
"962452a8455cc56c8511317e3b1f3b2c37df75f588e94325fdd77070359cf63a9ae6e930936fdf8e1e08ffca440cfb72c28f06d89a2151d1c46cd5b268ef8563",
"43d44bfa18768c59896bf7ed1765cb2d14af8c260266039099b25a603e4ddc5039d6ef3a91847d1088d401c0c7e847781a8a590d33a3c6cb4df0fab1c2f22355",
"dcffa9d58c2a4ca2cdbb0c7aa4c4c1d45165190089f4e983bb1c2cab4aaeff1fa2b5ee516fecd780540240bf37e56c8bcca7fab980e1e61c9400d8a9a5b14ac6",
"6fbf31b45ab0c0b8dad1c0f5f4061379912dde5aa922099a030b725c73346c524291adef89d2f6fd8dfcda6d07dad811a9314536c2915ed45da34947e83de34e",
"a0c65bddde8adef57282b04b11e7bc8aab105b99231b750c021f4a735cb1bcfab87553bba3abb0c3e64a0b6955285185a0bd35fb8cfde557329bebb1f629ee93",
"f99d815550558e81eca2f96718aed10d86f3f1cfb675cce06b0eff02f617c5a42c5aa760270f2679da2677c5aeb94f1142277f21c7f79f3c4f0cce4ed8ee62b1",
"95391da8fc7b917a2044b3d6f5374e1ca072b41454d572c7356c05fd4bc1e0f40b8bb8b4a9f6bce9be2c4623c399b0dca0dab05cb7281b71a21b0ebcd9e55670",
"04b9cd3d20d221c09ac86913d3dc63041989a9a1e694f1e639a3ba7e451840f750c2fc191d56ad61f2e7936bc0ac8e094b60caeed878c18799045402d61ceaf9",
"ec0e0ef707e4ed6c0c66f9e089e4954b058030d2dd86398fe84059631f9ee591d9d77375355149178c0cf8f8e7c49ed2a5e4f95488a2247067c208510fadc44c",
"9a37cce273b79c09913677510eaf7688e89b3314d3532fd2764c39de022a2945b5710d13517af8ddc0316624e73bec1ce67df15228302036f330ab0cb4d218dd",
"4cf9bb8fb3d4de8b38b2f262d3c40f46dfe747e8fc0a414c193d9fcf753106ce47a18f172f12e8a2f1c26726545358e5ee28c9e2213a8787aafbc516d2343152",
"64e0c63af9c808fd893137129867fd91939d53f2af04be4fa268006100069b2d69daa5c5d8ed7fddcb2a70eeecdf2b105dd46a1e3b7311728f639ab489326bc9",
"5e9c93158d659b2def06b0c3c7565045542662d6eee8a96a89b78ade09fe8b3dcc096d4fe48815d88d8f82620156602af541955e1f6ca30dce14e254c326b88f",
"7775dff889458dd11aef417276853e21335eb88e4dec9cfb4e9edb49820088551a2ca60339f12066101169f0dfe84b098fddb148d9da6b3d613df263889ad64b",
"f0d2805afbb91f743951351a6d024f9353a23c7ce1fc2b051b3a8b968c233f46f50f806ecb1568ffaa0b60661e334b21dde04f8fa155ac740eeb42e20b60d764",
"86a2af316e7d7754201b942e275364ac12ea8962ab5bd8d7fb276dc5fbffc8f9a28cae4e4867df6780d9b72524160927c855da5b6078e0b554aa91e31cb9ca1d",
"10bdf0caa0802705e706369baf8a3f79d72c0a03a80675a7bbb00be3a45e516424d1ee88efb56f6d5777545ae6e27765c3a8f5e493fc308915638933a1dfee55",
"b01781092b1748459e2e4ec178696627bf4ebafebba774ecf018b79a68aeb84917bf0b84bb79d17b743151144cd66b7b33a4b9e52c76c4e112050ff5385b7f0b",
"c6dbc61dec6eaeac81e3d5f755203c8e220551534a0b2fd105a91889945a638550204f44093dd998c076205dffad703a0e5cd3c7f438a7e634cd59fededb539e",
"eba51acffb4cea31db4b8d87e9bf7dd48fe97b0253ae67aa580f9ac4a9d941f2bea518ee286818cc9f633f2a3b9fb68e594b48cdd6d515bf1d52ba6c85a203a7",
"86221f3ada52037b72224f105d7999231c5e5534d03da9d9c0a12acb68460cd375daf8e24386286f9668f72326dbf99ba094392437d398e95bb8161d717f8991",
"5595e05c13a7ec4dc8f41fb70cb50a71bce17c024ff6de7af618d0cc4e9c32d9570d6d3ea45b86525491030c0d8f2b1836d5778c1ce735c17707df364d054347",
"ce0f4f6aca89590a37fe034dd74dd5fa65eb1cbd0a41508aaddc09351a3cea6d18cb2189c54b700c009f4cbf0521c7ea01be61c5ae09cb54f27bc1b44d658c82",
"7ee80b06a215a3bca970c77cda8761822bc103d44fa4b33f4d07dcb997e36d55298bceae12241b3fa07fa63be5576068da387b8d5859aeab701369848b176d42",
"940a84b6a84d109aab208c024c6ce9647676ba0aaa11f86dbb7018f9fd2220a6d901a9027f9abcf935372727cbf09ebd61a2a2eeb87653e8ecad1bab85dc8327",
"2020b78264a82d9f4151141adba8d44bf20c5ec062eee9b595a11f9e84901bf148f298e0c9f8777dcdbc7cc4670aac356cc2ad8ccb1629f16f6a76bcefbee760",
"d1b897b0e075ba68ab572adf9d9c436663e43eb3d8e62d92fc49c9be214e6f27873fe215a65170e6bea902408a25b49506f47babd07cecf7113ec10c5dd31252",
"b14d0c62abfa469a357177e594c10c194243ed2025ab8aa5ad2fa41ad318e0ff48cd5e60bec07b13634a711d2326e488a985f31e31153399e73088efc86a5c55",
"4169c5cc808d2697dc2a82430dc23e3cd356dc70a94566810502b8d655b39abf9e7f902fe717e0389219859e1945df1af6ada42e4ccda55a197b7100a30c30a1",
"258a4edb113d66c839c8b1c91f15f35ade609f11cd7f8681a4045b9fef7b0b24c82cda06a5f2067b368825e3914e53d6948ede92efd6e8387fa2e537239b5bee",
"79d2d8696d30f30fb34657761171a11e6c3f1e64cbe7bebee159cb95bfaf812b4f411e2f26d9c421dc2c284a3342d823ec293849e42d1e46b0a4ac1e3c86abaa",
"8b9436010dc5dee992ae38aea97f2cd63b946d94fedd2ec9671dcde3bd4ce9564d555c66c15bb2b900df72edb6b891ebcadfeff63c9ea4036a998be7973981e7",
"c8f68e696ed28242bf997f5b3b34959508e42d613810f1e2a435c96ed2ff560c7022f361a9234b9837feee90bf47922ee0fd5f8ddf823718d86d1e16c6090071",
"b02d3eee4860d5868b2c39ce39bfe81011290564dd678c85e8783f29302dfc1399ba95b6b53cd9ebbf400cca1db0ab67e19a325f2d115812d25d00978ad1bca4",
"7693ea73af3ac4dad21ca0d8da85b3118a7d1c6024cfaf557699868217bc0c2f44a199bc6c0edd519798ba05bd5b1b4484346a47c2cadf6bf30b785cc88b2baf",
"a0e5c1c0031c02e48b7f09a5e896ee9aef2f17fc9e18e997d7f6cac7ae316422c2b1e77984e5f3a73cb45deed5d3f84600105e6ee38f2d090c7d0442ea34c46d",
"41daa6adcfdb69f1440c37b596440165c15ada596813e2e22f060fcd551f24dee8e04ba6890387886ceec4a7a0d7fc6b44506392ec3822c0d8c1acfc7d5aebe8",
"14d4d40d5984d84c5cf7523b7798b254e275a3a8cc0a1bd06ebc0bee726856acc3cbf516ff667cda2058ad5c3412254460a82c92187041363cc77a4dc215e487",
"d0e7a1e2b9a447fee83e2277e9ff8010c2f375ae12fa7aaa8ca5a6317868a26a367a0b69fbc1cf32a55d34eb370663016f3d2110230eba754028a56f54acf57c",
"e771aa8db5a3e043e8178f39a0857ba04a3f18e4aa05743cf8d222b0b095825350ba422f63382a23d92e4149074e816a36c1cd28284d146267940b31f8818ea2",
"feb4fd6f9e87a56bef398b3284d2bda5b5b0e166583a66b61e538457ff0584872c21a32962b9928ffab58de4af2edd4e15d8b35570523207ff4e2a5aa7754caa",
"462f17bf005fb1c1b9e671779f665209ec2873e3e411f98dabf240a1d5ec3f95ce6796b6fc23fe171903b502023467dec7273ff74879b92967a2a43a5a183d33",
"d3338193b64553dbd38d144bea71c5915bb110e2d88180dbc5db364fd6171df317fc7268831b5aef75e4342b2fad8797ba39eddcef80e6ec08159350b1ad696d",
"e1590d585a3d39f7cb599abd479070966409a6846d4377acf4471d065d5db94129cc9be92573b05ed226be1e9b7cb0cabe87918589f80dadd4ef5ef25a93d28e",
"f8f3726ac5a26cc80132493a6fedcb0e60760c09cfc84cad178175986819665e76842d7b9fedf76dddebf5d3f56faaad4477587af21606d396ae570d8e719af2",
"30186055c07949948183c850e9a756cc09937e247d9d928e869e20bafc3cd9721719d34e04a0899b92c736084550186886efba2e790d8be6ebf040b209c439a4",
"f3c4276cb863637712c241c444c5cc1e3554e0fddb174d035819dd83eb700b4ce88df3ab3841ba02085e1a99b4e17310c5341075c0458ba376c95a6818fbb3e2",
"0aa007c4dd9d5832393040a1583c930bca7dc5e77ea53add7e2b3f7c8e231368043520d4a3ef53c969b6bbfd025946f632bd7f765d53c21003b8f983f75e2a6a",
"08e9464720533b23a04ec24f7ae8c103145f765387d738777d3d343477fd1c58db052142cab754ea674378e18766c53542f71970171cc4f81694246b717d7564",
"d37ff7ad297993e7ec21e0f1b4b5ae719cdc83c5db687527f27516cbffa822888a6810ee5c1ca7bfe3321119be1ab7bfa0a502671c8329494df7ad6f522d440f",
"dd9042f6e464dcf86b1262f6accfafbd8cfd902ed3ed89abf78ffa482dbdeeb6969842394c9a1168ae3d481a017842f660002d42447c6b22f7b72f21aae021c9",
"bd965bf31e87d70327536f2a341cebc4768eca275fa05ef98f7f1b71a0351298de006fba73fe6733ed01d75801b4a928e54231b38e38c562b2e33ea1284992fa",
"65676d800617972fbd87e4b9514e1c67402b7a331096d3bfac22f1abb95374abc942f16e9ab0ead33b87c91968a6e509e119ff07787b3ef483e1dcdccf6e3022",
"939fa189699c5d2c81ddd1ffc1fa207c970b6a3685bb29ce1d3e99d42f2f7442da53e95a72907314f4588399a3ff5b0a92beb3f6be2694f9f86ecf2952d5b41c",
"c516541701863f91005f314108ceece3c643e04fc8c42fd2ff556220e616aaa6a48aeb97a84bad74782e8dff96a1a2fa949339d722edcaa32b57067041df88cc",
"987fd6e0d6857c553eaebb3d34970a2c2f6e89a3548f492521722b80a1c21a153892346d2cba6444212d56da9a26e324dccbc0dcde85d4d2ee4399eec5a64e8f",
"ae56deb1c2328d9c4017706bce6e99d41349053ba9d336d677c4c27d9fd50ae6aee17e853154e1f4fe7672346da2eaa31eea53fcf24a22804f11d03da6abfc2b",
"49d6a608c9bde4491870498572ac31aac3fa40938b38a7818f72383eb040ad39532bc06571e13d767e6945ab77c0bdc3b0284253343f9f6c1244ebf2ff0df866",
"da582ad8c5370b4469af862aa6467a2293b2b28bd80ae0e91f425ad3d47249fdf98825cc86f14028c3308c9804c78bfeeeee461444ce243687e1a50522456a1d",
"d5266aa3331194aef852eed86d7b5b2633a0af1c735906f2e13279f14931a9fc3b0eac5ce9245273bd1aa92905abe16278ef7efd47694789a7283b77da3c70f8",
"2962734c28252186a9a1111c732ad4de4506d4b4480916303eb7991d659ccda07a9911914bc75c418ab7a4541757ad054796e26797feaf36e9f6ad43f14b35a4",
"e8b79ec5d06e111bdfafd71e9f5760f00ac8ac5d8bf768f9ff6f08b8f026096b1cc3a4c973333019f1e3553e77da3f98cb9f542e0a90e5f8a940cc58e59844b3",
"dfb320c44f9d41d1efdcc015f08dd5539e526e39c87d509ae6812a969e5431bf4fa7d91ffd03b981e0d544cf72d7b1c0374f8801482e6dea2ef903877eba675e",
"d88675118fdb55a5fb365ac2af1d217bf526ce1ee9c94b2f0090b2c58a06ca58187d7fe57c7bed9d26fca067b4110eefcd9a0a345de872abe20de368001b0745",
"b893f2fc41f7b0dd6e2f6aa2e0370c0cff7df09e3acfcc0e920b6e6fad0ef747c40668417d342b80d2351e8c175f20897a062e9765e6c67b539b6ba8b9170545",
"6c67ec5697accd235c59b486d7b70baeedcbd4aa64ebd4eef3c7eac189561a726250aec4d48cadcafbbe2ce3c16ce2d691a8cce06e8879556d4483ed7165c063",
"f1aa2b044f8f0c638a3f362e677b5d891d6fd2ab0765f6ee1e4987de057ead357883d9b405b9d609eea1b869d97fb16d9b51017c553f3b93c0a1e0f1296fedcd",
"cbaa259572d4aebfc1917acddc582b9f8dfaa928a198ca7acd0f2aa76a134a90252e6298a65b08186a350d5b7626699f8cb721a3ea5921b753ae3a2dce24ba3a",
"fa1549c9796cd4d303dcf452c1fbd5744fd9b9b47003d920b92de34839d07ef2a29ded68f6fc9e6c45e071a2e48bd50c5084e96b657dd0404045a1ddefe282ed",
"5cf2ac897ab444dcb5c8d87c495dbdb34e1838b6b629427caa51702ad0f9688525f13bec503a3c3a2c80a65e0b5715e8afab00ffa56ec455a49a1ad30aa24fcd",
"9aaf80207bace17bb7ab145757d5696bde32406ef22b44292ef65d4519c3bb2ad41a59b62cc3e94b6fa96d32a7faadae28af7d35097219aa3fd8cda31e40c275",
"af88b163402c86745cb650c2988fb95211b94b03ef290eed9662034241fd51cf398f8073e369354c43eae1052f9b63b08191caa138aa54fea889cc7024236897",
"48fa7d64e1ceee27b9864db5ada4b53d00c9bc7626555813d3cd6730ab3cc06ff342d727905e33171bde6e8476e77fb1720861e94b73a2c538d254746285f430",
"0e6fd97a85e904f87bfe85bbeb34f69e1f18105cf4ed4f87aec36c6e8b5f68bd2a6f3dc8a9ecb2b61db4eedb6b2ea10bf9cb0251fb0f8b344abf7f366b6de5ab",
"06622da5787176287fdc8fed440bad187d830099c94e6d04c8e9c954cda70c8bb9e1fc4a6d0baa831b9b78ef6648681a4867a11da93ee36e5e6a37d87fc63f6f",
"1da6772b58fabf9c61f68d412c82f182c0236d7d575ef0b58dd22458d643cd1dfc93b03871c316d8430d312995d4197f0874c99172ba004a01ee295abac24e46",
"3cd2d9320b7b1d5fb9aab951a76023fa667be14a9124e394513918a3f44096ae4904ba0ffc150b63bc7ab1eeb9a6e257e5c8f000a70394a5afd842715de15f29",
"04cdc14f7434e0b4be70cb41db4c779a88eaef6accebcb41f2d42fffe7f32a8e281b5c103a27021d0d08362250753cdf70292195a53a48728ceb5844c2d98bab",
"9071b7a8a075d0095b8fb3ae5113785735ab98e2b52faf91d5b89e44aac5b5d4ebbf91223b0ff4c71905da55342e64655d6ef8c89a4768c3f93a6dc0366b5bc8",
"ebb30240dd96c7bc8d0abe49aa4edcbb4afdc51ff9aaf720d3f9e7fbb0f9c6d6571350501769fc4ebd0b2141247ff400d4fd4be414edf37757bb90a32ac5c65a",
"8532c58bf3c8015d9d1cbe00eef1f5082f8f3632fbe9f1ed4f9dfb1fa79e8283066d77c44c4af943d76b300364aecbd0648c8a8939bd204123f4b56260422dec",
"fe9846d64f7c7708696f840e2d76cb4408b6595c2f81ec6a28a7f2f20cb88cfe6ac0b9e9b8244f08bd7095c350c1d0842f64fb01bb7f532dfcd47371b0aeeb79",
"28f17ea6fb6c42092dc264257e29746321fb5bdaea9873c2a7fa9d8f53818e899e161bc77dfe8090afd82bf2266c5c1bc930a8d1547624439e662ef695f26f24",
"ec6b7d7f030d4850acae3cb615c21dd25206d63e84d1db8d957370737ba0e98467ea0ce274c66199901eaec18a08525715f53bfdb0aacb613d342ebdceeddc3b",
"b403d3691c03b0d3418df327d5860d34bbfcc4519bfbce36bf33b208385fadb9186bc78a76c489d89fd57e7dc75412d23bcd1dae8470ce9274754bb8585b13c5",
"31fc79738b8772b3f55cd8178813b3b52d0db5a419d30ba9495c4b9da0219fac6df8e7c23a811551a62b827f256ecdb8124ac8a6792ccfecc3b3012722e94463",
"bb2039ec287091bcc9642fc90049e73732e02e577e2862b32216ae9bedcd730c4c284ef3968c368b7d37584f97bd4b4dc6ef6127acfe2e6ae2509124e66c8af4",
"f53d68d13f45edfcb9bd415e2831e938350d5380d3432278fc1c0c381fcb7c65c82dafe051d8c8b0d44e0974a0e59ec7bf7ed0459f86e96f329fc79752510fd3",
"8d568c7984f0ecdf7640fbc483b5d8c9f86634f6f43291841b309a350ab9c1137d24066b09da9944bac54d5bb6580d836047aac74ab724b887ebf93d4b32eca9",
"c0b65ce5a96ff774c456cac3b5f2c4cd359b4ff53ef93a3da0778be4900d1e8da1601e769e8f1b02d2a2f8c5b9fa10b44f1c186985468feeb008730283a6657d",
"4900bba6f5fb103ece8ec96ada13a5c3c85488e05551da6b6b33d988e611ec0fe2e3c2aa48ea6ae8986a3a231b223c5d27cec2eadde91ce07981ee652862d1e4",
"c7f5c37c7285f927f76443414d4357ff789647d7a005a5a787e03c346b57f49f21b64fa9cf4b7e45573e23049017567121a9c3d4b2b73ec5e9413577525db45a",
"ec7096330736fdb2d64b5653e7475da746c23a4613a82687a28062d3236364284ac01720ffb406cfe265c0df626a188c9e5963ace5d3d5bb363e32c38c2190a6",
"82e744c75f4649ec52b80771a77d475a3bc091989556960e276a5f9ead92a03f718742cdcfeaee5cb85c44af198adc43a4a428f5f0c2ddb0be36059f06d7df73",
"2834b7a7170f1f5b68559ab78c1050ec21c919740b784a9072f6e5d69f828d70c919c5039fb148e39e2c8a52118378b064ca8d5001cd10a5478387b966715ed6",
"16b4ada883f72f853bb7ef253efcab0c3e2161687ad61543a0d2824f91c1f81347d86be709b16996e17f2dd486927b0288ad38d13063c4a9672c39397d3789b6",
"78d048f3a69d8b54ae0ed63a573ae350d89f7c6cf1f3688930de899afa037697629b314e5cd303aa62feea72a25bf42b304b6c6bcb27fae21c16d925e1fbdac3",
"0f746a48749287ada77a82961f05a4da4abdb7d77b1220f836d09ec814359c0ec0239b8c7b9ff9e02f569d1b301ef67c4612d1de4f730f81c12c40cc063c5caa",
"f0fc859d3bd195fbdc2d591e4cdac15179ec0f1dc821c11df1f0c1d26e6260aaa65b79fafacafd7d3ad61e600f250905f5878c87452897647a35b995bcadc3a3",
"2620f687e8625f6a412460b42e2cef67634208ce10a0cbd4dff7044a41b7880077e9f8dc3b8d1216d3376a21e015b58fb279b521d83f9388c7382c8505590b9b",
"227e3aed8d2cb10b918fcb04f9de3e6d0a57e08476d93759cd7b2ed54a1cbf0239c528fb04bbf288253e601d3bc38b21794afef90b17094a182cac557745e75f",
"1a929901b09c25f27d6b35be7b2f1c4745131fdebca7f3e2451926720434e0db6e74fd693ad29b777dc3355c592a361c4873b01133a57c2e3b7075cbdb86f4fc",
"5fd7968bc2fe34f220b5e3dc5af9571742d73b7d60819f2888b629072b96a9d8ab2d91b82d0a9aaba61bbd39958132fcc4257023d1eca591b3054e2dc81c8200",
"dfcce8cf32870cc6a503eadafc87fd6f78918b9b4d0737db6810be996b5497e7e5cc80e312f61e71ff3e9624436073156403f735f56b0b01845c18f6caf772e6",
"02f7ef3a9ce0fff960f67032b296efca3061f4934d690749f2d01c35c81c14f39a67fa350bc8a0359bf1724bffc3bca6d7c7bba4791fd522a3ad353c02ec5aa8",
"64be5c6aba65d594844ae78bb022e5bebe127fd6b6ffa5a13703855ab63b624dcd1a363f99203f632ec386f3ea767fc992e8ed9686586aa27555a8599d5b808f",
"f78585505c4eaa54a8b5be70a61e735e0ff97af944ddb3001e35d86c4e2199d976104b6ae31750a36a726ed285064f5981b503889fef822fcdc2898dddb7889a",
"e4b5566033869572edfd87479a5bb73c80e8759b91232879d96b1dda36c012076ee5a2ed7ae2de63ef8406a06aea82c188031b560beafb583fb3de9e57952a7e",
"e1b3e7ed867f6c9484a2a97f7715f25e25294e992e41f6a7c161ffc2adc6daaeb7113102d5e6090287fe6ad94ce5d6b739c6ca240b05c76fb73f25dd024bf935",
"85fd085fdc12a080983df07bd7012b0d402a0f4043fcb2775adf0bad174f9b08d1676e476985785c0a5dcc41dbff6d95ef4d66a3fbdc4a74b82ba52da0512b74",
"aed8fa764b0fbff821e05233d2f7b0900ec44d826f95e93c343c1bc3ba5a24374b1d616e7e7aba453a0ada5e4fab5382409e0d42ce9c2bc7fb39a99c340c20f0",
"7ba3b2e297233522eeb343bd3ebcfd835a04007735e87f0ca300cbee6d416565162171581e4020ff4cf176450f1291ea2285cb9ebffe4c56660627685145051c",
"de748bcf89ec88084721e16b85f30adb1a6134d664b5843569babc5bbd1a15ca9b61803c901a4fef32965a1749c9f3a4e243e173939dc5a8dc495c671ab52145",
"aaf4d2bdf200a919706d9842dce16c98140d34bc433df320aba9bd429e549aa7a3397652a4d768277786cf993cde2338673ed2e6b66c961fefb82cd20c93338f",
"c408218968b788bf864f0997e6bc4c3dba68b276e2125a4843296052ff93bf5767b8cdce7131f0876430c1165fec6c4f47adaa4fd8bcfacef463b5d3d0fa61a0",
"76d2d819c92bce55fa8e092ab1bf9b9eab237a25267986cacf2b8ee14d214d730dc9a5aa2d7b596e86a1fd8fa0804c77402d2fcd45083688b218b1cdfa0dcbcb",
"72065ee4dd91c2d8509fa1fc28a37c7fc9fa7d5b3f8ad3d0d7a25626b57b1b44788d4caf806290425f9890a3a2a35a905ab4b37acfd0da6e4517b2525c9651e4",
"64475dfe7600d7171bea0b394e27c9b00d8e74dd1e416a79473682ad3dfdbb706631558055cfc8a40e07bd015a4540dcdea15883cbbf31412df1de1cd4152b91",
"12cd1674a4488a5d7c2b3160d2e2c4b58371bedad793418d6f19c6ee385d70b3e06739369d4df910edb0b0a54cbff43d54544cd37ab3a06cfa0a3ddac8b66c89",
"60756966479dedc6dd4bcff8ea7d1d4ce4d4af2e7b097e32e3763518441147cc12b3c0ee6d2ecabf1198cec92e86a3616fba4f4e872f5825330adbb4c1dee444",
"a7803bcb71bc1d0f4383dde1e0612e04f872b715ad30815c2249cf34abb8b024915cb2fc9f4e7cc4c8cfd45be2d5a91eab0941c7d270e2da4ca4a9f7ac68663a",
"b84ef6a7229a34a750d9a98ee2529871816b87fbe3bc45b45fa5ae82d5141540211165c3c5d7a7476ba5a4aa06d66476f0d9dc49a3f1ee72c3acabd498967414",
"fae4b6d8efc3f8c8e64d001dabec3a21f544e82714745251b2b4b393f2f43e0da3d403c64db95a2cb6e23ebb7b9e94cdd5ddac54f07c4a61bd3cb10aa6f93b49",
"34f7286605a122369540141ded79b8957255da2d4155abbf5a8dbb89c8eb7ede8eeef1daa46dc29d751d045dc3b1d658bb64b80ff8589eddb3824b13da235a6b",
"3b3b48434be27b9eababba43bf6b35f14b30f6a88dc2e750c358470d6b3aa3c18e47db4017fa55106d8252f016371a00f5f8b070b74ba5f23cffc5511c9f09f0",
"ba289ebd6562c48c3e10a8ad6ce02e73433d1e93d7c9279d4d60a7e879ee11f441a000f48ed9f7c4ed87a45136d7dccdca482109c78a51062b3ba4044ada2469",
"022939e2386c5a37049856c850a2bb10a13dfea4212b4c732a8840a9ffa5faf54875c5448816b2785a007da8a8d2bc7d71a54e4e6571f10b600cbdb25d13ede3",
"e6fec19d89ce8717b1a087024670fe026f6c7cbda11caef959bb2d351bf856f8055d1c0ebdaaa9d1b17886fc2c562b5e99642fc064710c0d3488a02b5ed7f6fd",
"94c96f02a8f576aca32ba61c2b206f907285d9299b83ac175c209a8d43d53bfe683dd1d83e7549cb906c28f59ab7c46f8751366a28c39dd5fe2693c9019666c8",
"31a0cd215ebd2cb61de5b9edc91e6195e31c59a5648d5c9f737e125b2605708f2e325ab3381c8dce1a3e958886f1ecdc60318f882cfe20a24191352e617b0f21",
"91ab504a522dce78779f4c6c6ba2e6b6db5565c76d3e7e7c920caf7f757ef9db7c8fcf10e57f03379ea9bf75eb59895d96e149800b6aae01db778bb90afbc989",
"d85cabc6bd5b1a01a5afd8c6734740da9fd1c1acc6db29bfc8a2e5b668b028b6b3154bfb8703fa3180251d589ad38040ceb707c4bad1b5343cb426b61eaa49c1",
"d62efbec2ca9c1f8bd66ce8b3f6a898cb3f7566ba6568c618ad1feb2b65b76c3ce1dd20f7395372faf28427f61c9278049cf0140df434f5633048c86b81e0399",
"7c8fdc6175439e2c3db15bafa7fb06143a6a23bc90f449e79deef73c3d492a671715c193b6fea9f036050b946069856b897e08c00768f5ee5ddcf70b7cd6d0e0",
"58602ee7468e6bc9df21bd51b23c005f72d6cb013f0a1b48cbec5eca299299f97f09f54a9a01483eaeb315a6478bad37ba47ca1347c7c8fc9e6695592c91d723",
"27f5b79ed256b050993d793496edf4807c1d85a7b0a67c9c4fa99860750b0ae66989670a8ffd7856d7ce411599e58c4d77b232a62bef64d15275be46a68235ff",
"3957a976b9f1887bf004a8dca942c92d2b37ea52600f25e0c9bc5707d0279c00c6e85a839b0d2d8eb59c51d94788ebe62474a791cadf52cccf20f5070b6573fc",
"eaa2376d55380bf772ecca9cb0aa4668c95c707162fa86d518c8ce0ca9bf7362b9f2a0adc3ff59922df921b94567e81e452f6c1a07fc817cebe99604b3505d38",
"c1e2c78b6b2734e2480ec550434cb5d613111adcc21d475545c3b1b7e6ff12444476e5c055132e2229dc0f807044bb919b1a5662dd38a9ee65e243a3911aed1a",
"8ab48713389dd0fcf9f965d3ce66b1e559a1f8c58741d67683cd971354f452e62d0207a65e436c5d5d8f8ee71c6abfe50e669004c302b31a7ea8311d4a916051",
"24ce0addaa4c65038bd1b1c0f1452a0b128777aabc94a29df2fd6c7e2f85f8ab9ac7eff516b0e0a825c84a24cfe492eaad0a6308e46dd42fe8333ab971bb30ca",
"5154f929ee03045b6b0c0004fa778edee1d139893267cc84825ad7b36c63de32798e4a166d24686561354f63b00709a1364b3c241de3febf0754045897467cd4",
"e74e907920fd87bd5ad636dd11085e50ee70459c443e1ce5809af2bc2eba39f9e6d7128e0e3712c316da06f4705d78a4838e28121d4344a2c79c5e0db307a677",
"bf91a22334bac20f3fd80663b3cd06c4e8802f30e6b59f90d3035cc9798a217ed5a31abbda7fa6842827bdf2a7a1c21f6fcfccbb54c6c52926f32da816269be1",
"d9d5c74be5121b0bd742f26bffb8c89f89171f3f934913492b0903c271bbe2b3395ef259669bef43b57f7fcc3027db01823f6baee66e4f9fead4d6726c741fce",
"50c8b8cf34cd879f80e2faab3230b0c0e1cc3e9dcadeb1b9d97ab923415dd9a1fe38addd5c11756c67990b256e95ad6d8f9fedce10bf1c90679cde0ecf1be347",
"0a386e7cd5dd9b77a035e09fe6fee2c8ce61b5383c87ea43205059c5e4cd4f4408319bb0a82360f6a58e6c9ce3f487c446063bf813bc6ba535e17fc1826cfc91",
"1f1459cb6b61cbac5f0efe8fc487538f42548987fcd56221cfa7beb22504769e792c45adfb1d6b3d60d7b749c8a75b0bdf14e8ea721b95dca538ca6e25711209",
"e58b3836b7d8fedbb50ca5725c6571e74c0785e97821dab8b6298c10e4c079d4a6cdf22f0fedb55032925c16748115f01a105e77e00cee3d07924dc0d8f90659",
"b929cc6505f020158672deda56d0db081a2ee34c00c1100029bdf8ea98034fa4bf3e8655ec697fe36f40553c5bb46801644a627d3342f4fc92b61f03290fb381",
"72d353994b49d3e03153929a1e4d4f188ee58ab9e72ee8e512f29bc773913819ce057ddd7002c0433ee0a16114e3d156dd2c4a7e80ee53378b8670f23e33ef56",
"c70ef9bfd775d408176737a0736d68517ce1aaad7e81a93c8c1ed967ea214f56c8a377b1763e676615b60f3988241eae6eab9685a5124929d28188f29eab06f7",
"c230f0802679cb33822ef8b3b21bf7a9a28942092901d7dac3760300831026cf354c9232df3e084d9903130c601f63c1f4a4a4b8106e468cd443bbe5a734f45f",
"6f43094cafb5ebf1f7a4937ec50f56a4c9da303cbb55ac1f27f1f1976cd96beda9464f0e7b9c54620b8a9fba983164b8be3578425a024f5fe199c36356b88972",
"3745273f4c38225db2337381871a0c6aafd3af9b018c88aa02025850a5dc3a42a1a3e03e56cbf1b0876d63a441f1d2856a39b8801eb5af325201c415d65e97fe",
"c50c44cca3ec3edaae779a7e179450ebdda2f97067c690aa6c5a4ac7c30139bb27c0df4db3220e63cb110d64f37ffe078db72653e2daacf93ae3f0a2d1a7eb2e",
"8aef263e385cbc61e19b28914243262af5afe8726af3ce39a79c27028cf3ecd3f8d2dfd9cfc9ad91b58f6f20778fd5f02894a3d91c7d57d1e4b866a7f364b6be",
"28696141de6e2d9bcb3235578a66166c1448d3e905a1b482d423be4bc5369bc8c74dae0acc9cc123e1d8ddce9f97917e8c019c552da32d39d2219b9abf0fa8c8",
"2fb9eb2085830181903a9dafe3db428ee15be7662224efd643371fb25646aee716e531eca69b2bdc8233f1a8081fa43da1500302975a77f42fa592136710e9dc",
"66f9a7143f7a3314a669bf2e24bbb35014261d639f495b6c9c1f104fe8e320aca60d4550d69d52edbd5a3cdeb4014ae65b1d87aa770b69ae5c15f4330b0b0ad8",
"f4c4dd1d594c3565e3e25ca43dad82f62abea4835ed4cd811bcd975e46279828d44d4c62c3679f1b7f7b9dd4571d7b49557347b8c5460cbdc1bef690fb2a08c0",
"8f1dc9649c3a84551f8f6e91cac68242a43b1f8f328ee92280257387fa7559aa6db12e4aeadc2d26099178749c6864b357f3f83b2fb3efa8d2a8db056bed6bcc",
"3139c1a7f97afd1675d460ebbc07f2728aa150df849624511ee04b743ba0a833092f18c12dc91b4dd243f333402f59fe28abdbbbae301e7b659c7a26d5c0f979",
"06f94a2996158a819fe34c40de3cf0379fd9fb85b3e363ba3926a0e7d960e3f4c2e0c70c7ce0ccb2a64fc29869f6e7ab12bd4d3f14fce943279027e785fb5c29",
"c29c399ef3eee8961e87565c1ce263925fc3d0ce267d13e48dd9e732ee67b0f69fad56401b0f10fcaac119201046cca28c5b14abdea3212ae65562f7f138db3d",
"4cec4c9df52eef05c3f6faaa9791bc7445937183224ecc37a1e58d0132d35617531d7e795f52af7b1eb9d147de1292d345fe341823f8e6bc1e5badca5c656108",
"898bfbae93b3e18d00697eab7d9704fa36ec339d076131cefdf30edbe8d9cc81c3a80b129659b163a323bab9793d4feed92d54dae966c77529764a09be88db45",
"ee9bd0469d3aaf4f14035be48a2c3b84d9b4b1fff1d945e1f1c1d38980a951be197b25fe22c731f20aeacc930ba9c4a1f4762227617ad350fdabb4e80273a0f4",
"3d4d3113300581cd96acbf091c3d0f3c310138cd6979e6026cde623e2dd1b24d4a8638bed1073344783ad0649cc6305ccec04beb49f31c633088a99b65130267",
"95c0591ad91f921ac7be6d9ce37e0663ed8011c1cfd6d0162a5572e94368bac02024485e6a39854aa46fe38e97d6c6b1947cd272d86b06bb5b2f78b9b68d559d",
"227b79ded368153bf46c0a3ca978bfdbef31f3024a5665842468490b0ff748ae04e7832ed4c9f49de9b1706709d623e5c8c15e3caecae8d5e433430ff72f20eb",
"5d34f3952f0105eef88ae8b64c6ce95ebfade0e02c69b08762a8712d2e4911ad3f941fc4034dc9b2e479fdbcd279b902faf5d838bb2e0c6495d372b5b7029813",
"7f939bf8353abce49e77f14f3750af20b7b03902e1a1e7fb6aaf76d0259cd401a83190f15640e74f3e6c5a90e839c7821f6474757f75c7bf9002084ddc7a62dc",
"062b61a2f9a33a71d7d0a06119644c70b0716a504de7e5e1be49bd7b86e7ed6817714f9f0fc313d06129597e9a2235ec8521de36f7290a90ccfc1ffa6d0aee29",
"f29e01eeae64311eb7f1c6422f946bf7bea36379523e7b2bbaba7d1d34a22d5ea5f1c5a09d5ce1fe682cced9a4798d1a05b46cd72dff5c1b355440b2a2d476bc",
"ec38cd3bbab3ef35d7cb6d5c914298351d8a9dc97fcee051a8a02f58e3ed6184d0b7810a5615411ab1b95209c3c810114fdeb22452084e77f3f847c6dbaafe16",
"c2aef5e0ca43e82641565b8cb943aa8ba53550caef793b6532fafad94b816082f0113a3ea2f63608ab40437ecc0f0229cb8fa224dcf1c478a67d9b64162b92d1",
"15f534efff7105cd1c254d074e27d5898b89313b7d366dc2d7d87113fa7d53aae13f6dba487ad8103d5e854c91fdb6e1e74b2ef6d1431769c30767dde067a35c",
"89acbca0b169897a0a2714c2df8c95b5b79cb69390142b7d6018bb3e3076b099b79a964152a9d912b1b86412b7e372e9cecad7f25d4cbab8a317be36492a67d7",
"e3c0739190ed849c9c962fd9dbb55e207e624fcac1eb417691515499eea8d8267b7e8f1287a63633af5011fde8c4ddf55bfdf722edf88831414f2cfaed59cb9a",
"8d6cf87c08380d2d1506eee46fd4222d21d8c04e585fbfd08269c98f702833a156326a0724656400ee09351d57b440175e2a5de93cc5f80db6daf83576cf75fa",
"da24bede383666d563eeed37f6319baf20d5c75d1635a6ba5ef4cfa1ac95487e96f8c08af600aab87c986ebad49fc70a58b4890b9c876e091016daf49e1d322e",
"f9d1d1b1e87ea7ae753a029750cc1cf3d0157d41805e245c5617bb934e732f0ae3180b78e05bfe76c7c3051e3e3ac78b9b50c05142657e1e03215d6ec7bfd0fc",
"11b7bc1668032048aa43343de476395e814bbbc223678db951a1b03a021efac948cfbe215f97fe9a72a2f6bc039e3956bfa417c1a9f10d6d7ba5d3d32ff323e5",
"b8d9000e4fc2b066edb91afee8e7eb0f24e3a201db8b6793c0608581e628ed0bcc4e5aa6787992a4bcc44e288093e63ee83abd0bc3ec6d0934a674a4da13838a",
"ce325e294f9b6719d6b61278276ae06a2564c03bb0b783fafe785bdf89c7d5acd83e78756d301b445699024eaeb77b54d477336ec2a4f332f2b3f88765ddb0c3",
"29acc30e9603ae2fccf90bf97e6cc463ebe28c1b2f9b4b765e70537c25c702a29dcbfbf14c99c54345ba2b51f17b77b5f15db92bbad8fa95c471f5d070a137cc",
"3379cbaae562a87b4c0425550ffdd6bfe1203f0d666cc7ea095be407a5dfe61ee91441cd5154b3e53b4f5fb31ad4c7a9ad5c7af4ae679aa51a54003a54ca6b2d",
"3095a349d245708c7cf550118703d7302c27b60af5d4e67fc978f8a4e60953c7a04f92fcf41aee64321ccb707a895851552b1e37b00bc5e6b72fa5bcef9e3fff",
"07262d738b09321f4dbccec4bb26f48cb0f0ed246ce0b31b9a6e7bc683049f1f3e5545f28ce932dd985c5ab0f43bd6de0770560af329065ed2e49d34624c2cbb",
"b6405eca8ee3316c87061cc6ec18dba53e6c250c63ba1f3bae9e55dd3498036af08cd272aa24d713c6020d77ab2f3919af1a32f307420618ab97e73953994fb4",
"7ee682f63148ee45f6e5315da81e5c6e557c2c34641fc509c7a5701088c38a74756168e2cd8d351e88fd1a451f360a01f5b2580f9b5a2e8cfc138f3dd59a3ffc",
"1d263c179d6b268f6fa016f3a4f29e943891125ed8593c81256059f5a7b44af2dcb2030d175c00e62ecaf7ee96682aa07ab20a611024a28532b1c25b86657902",
"106d132cbdb4cd2597812846e2bc1bf732fec5f0a5f65dbb39ec4e6dc64ab2ce6d24630d0f15a805c3540025d84afa98e36703c3dbee713e72dde8465bc1be7e",
"0e79968226650667a8d862ea8da4891af56a4e3a8b6d1750e394f0dea76d640d85077bcec2cc86886e506751b4f6a5838f7f0b5fef765d9dc90dcdcbaf079f08",
"521156a82ab0c4e566e5844d5e31ad9aaf144bbd5a464fdca34dbd5717e8ff711d3ffebbfa085d67fe996a34f6d3e4e60b1396bf4b1610c263bdbb834d560816",
"1aba88befc55bc25efbce02db8b9933e46f57661baeabeb21cc2574d2a518a3cba5dc5a38e49713440b25f9c744e75f6b85c9d8f4681f676160f6105357b8406",
"5a9949fcb2c473cda968ac1b5d08566dc2d816d960f57e63b898fa701cf8ebd3f59b124d95bfbbedc5f1cf0e17d5eaed0c02c50b69d8a402cabcca4433b51fd4",
"b0cead09807c672af2eb2b0f06dde46cf5370e15a4096b1a7d7cbb36ec31c205fbefca00b7a4162fa89fb4fb3eb78d79770c23f44e7206664ce3cd931c291e5d",
"bb6664931ec97044e45b2ae420ae1c551a8874bc937d08e969399c3964ebdba8346cdd5d09caafe4c28ba7ec788191ceca65ddd6f95f18583e040d0f30d0364d",
"65bc770a5faa3792369803683e844b0be7ee96f29f6d6a35568006bd5590f9a4ef639b7a8061c7b0424b66b60ac34af3119905f33a9d8c3ae18382ca9b689900",
"ea9b4dca333336aaf839a45c6eaa48b8cb4c7ddabffea4f643d6357ea6628a480a5b45f2b052c1b07d1fedca918b6f1139d80f74c24510dcbaa4be70eacc1b06",
"e6342fb4a780ad975d0e24bce149989b91d360557e87994f6b457b895575cc02d0c15bad3ce7577f4c63927ff13f3e381ff7e72bdbe745324844a9d27e3f1c01",
"3e209c9b33e8e461178ab46b1c64b49a07fb745f1c8bc95fbfb94c6b87c69516651b264ef980937fad41238b91ddc011a5dd777c7efd4494b4b6ecd3a9c22ac0",
"fd6a3d5b1875d80486d6e69694a56dbb04a99a4d051f15db2689776ba1c4882e6d462a603b7015dc9f4b7450f05394303b8652cfb404a266962c41bae6e18a94",
"951e27517e6bad9e4195fc8671dee3e7e9be69cee1422cb9fecfce0dba875f7b310b93ee3a3d558f941f635f668ff832d2c1d033c5e2f0997e4c66f147344e02",
"8eba2f874f1ae84041903c7c4253c82292530fc8509550bfdc34c95c7e2889d5650b0ad8cb988e5c4894cb87fbfbb19612ea93ccc4c5cad17158b9763464b492",
"16f712eaa1b7c6354719a8e7dbdfaf55e4063a4d277d947550019b38dfb564830911057d50506136e2394c3b28945cc964967d54e3000c2181626cfb9b73efd2",
"c39639e7d5c7fb8cdd0fd3e6a52096039437122f21c78f1679cea9d78a734c56ecbeb28654b4f18e342c331f6f7229ec4b4bc281b2d80a6eb50043f31796c88c",
"72d081af99f8a173dcc9a0ac4eb3557405639a29084b54a40172912a2f8a395129d5536f0918e902f9e8fa6000995f4168ddc5f893011be6a0dbc9b8a1a3f5bb",
"c11aa81e5efd24d5fc27ee586cfd8847fbb0e27601ccece5ecca0198e3c7765393bb74457c7e7a27eb9170350e1fb53857177506be3e762cc0f14d8c3afe9077",
"c28f2150b452e6c0c424bcde6f8d72007f9310fed7f2f87de0dbb64f4479d6c1441ba66f44b2accee61609177ed340128b407ecec7c64bbe50d63d22d8627727",
"f63d88122877ec30b8c8b00d22e89000a966426112bd44166e2f525b769ccbe9b286d437a0129130dde1a86c43e04bedb594e671d98283afe64ce331de9828fd",
"348b0532880b88a6614a8d7408c3f913357fbb60e995c60205be9139e74998aede7f4581e42f6b52698f7fa1219708c14498067fd1e09502de83a77dd281150c",
"5133dc8bef725359dff59792d85eaf75b7e1dcd1978b01c35b1b85fcebc63388ad99a17b6346a217dc1a9622ebd122ecf6913c4d31a6b52a695b86af00d741a0",
"2753c4c0e98ecad806e88780ec27fccd0f5c1ab547f9e4bf1659d192c23aa2cc971b58b6802580baef8adc3b776ef7086b2545c2987f348ee3719cdef258c403",
"b1663573ce4b9d8caefc865012f3e39714b9898a5da6ce17c25a6a47931a9ddb9bbe98adaa553beed436e89578455416c2a52a525cf2862b8d1d49a2531b7391",
"64f58bd6bfc856f5e873b2a2956ea0eda0d6db0da39c8c7fc67c9f9feefcff3072cdf9e6ea37f69a44f0c61aa0da3693c2db5b54960c0281a088151db42b11e8",
"0764c7be28125d9065c4b98a69d60aede703547c66a12e17e1c618994132f5ef82482c1e3fe3146cc65376cc109f0138ed9a80e49f1f3c7d610d2f2432f20605",
"f748784398a2ff03ebeb07e155e66116a839741a336e32da71ec696001f0ad1b25cd48c69cfca7265eca1dd71904a0ce748ac4124f3571076dfa7116a9cf00e9",
"3f0dbc0186bceb6b785ba78d2a2a013c910be157bdaffae81bb6663b1a73722f7f1228795f3ecada87cf6ef0078474af73f31eca0cc200ed975b6893f761cb6d",
"d4762cd4599876ca75b2b8fe249944dbd27ace741fdab93616cbc6e425460feb51d4e7adcc38180e7fc47c89024a7f56191adb878dfde4ead62223f5a2610efe",
"cd36b3d5b4c91b90fcbba79513cfee1907d8645a162afd0cd4cf4192d4a5f4c892183a8eacdb2b6b6a9d9aa8c11ac1b261b380dbee24ca468f1bfd043c58eefe",
"98593452281661a53c48a9d8cd790826c1a1ce567738053d0bee4a91a3d5bd92eefdbabebe3204f2031ca5f781bda99ef5d8ae56e5b04a9e1ecd21b0eb05d3e1",
"771f57dd2775ccdab55921d3e8e30ccf484d61fe1c1b9c2ae819d0fb2a12fab9be70c4a7a138da84e8280435daade5bbe66af0836a154f817fb17f3397e725a3",
"c60897c6f828e21f16fbb5f15b323f87b6c8955eabf1d38061f707f608abdd993fac3070633e286cf8339ce295dd352df4b4b40b2f29da1dd50b3a05d079e6bb",
"8210cd2c2d3b135c2cf07fa0d1433cd771f325d075c6469d9c7f1ba0943cd4ab09808cabf4acb9ce5bb88b498929b4b847f681ad2c490d042db2aec94214b06b",
"1d4edfffd8fd80f7e4107840fa3aa31e32598491e4af7013c197a65b7f36dd3ac4b478456111cd4309d9243510782fa31b7c4c95fa951520d020eb7e5c36e4ef",
"af8e6e91fab46ce4873e1a50a8ef448cc29121f7f74deef34a71ef89cc00d9274bc6c2454bbb3230d8b2ec94c62b1dec85f3593bfa30ea6f7a44d7c09465a253",
"29fd384ed4906f2d13aa9fe7af905990938bed807f1832454a372ab412eea1f5625a1fcc9ac8343b7c67c5aba6e0b1cc4644654913692c6b39eb9187ceacd3ec",
"a268c7885d9874a51c44dffed8ea53e94f78456e0b2ed99ff5a3924760813826d960a15edbedbb5de5226ba4b074e71b05c55b9756bb79e55c02754c2c7b6c8a",
"0cf8545488d56a86817cd7ecb10f7116b7ea530a45b6ea497b6c72c997e09e3d0da8698f46bb006fc977c2cd3d1177463ac9057fdd1662c85d0c126443c10473",
"b39614268fdd8781515e2cfebf89b4d5402bab10c226e6344e6b9ae000fb0d6c79cb2f3ec80e80eaeb1980d2f8698916bd2e9f747236655116649cd3ca23a837",
"74bef092fc6f1e5dba3663a3fb003b2a5ba257496536d99f62b9d73f8f9eb3ce9ff3eec709eb883655ec9eb896b9128f2afc89cf7d1ab58a72f4a3bf034d2b4a",
"3a988d38d75611f3ef38b8774980b33e573b6c57bee0469ba5eed9b44f29945e7347967fba2c162e1c3be7f310f2f75ee2381e7bfd6b3f0baea8d95dfb1dafb1",
"58aedfce6f67ddc85a28c992f1c0bd0969f041e66f1ee88020a125cbfcfebcd61709c9c4eba192c15e69f020d462486019fa8dea0cd7a42921a19d2fe546d43d",
"9347bd291473e6b4e368437b8e561e065f649a6d8ada479ad09b1999a8f26b91cf6120fd3bfe014e83f23acfa4c0ad7b3712b2c3c0733270663112ccd9285cd9",
"b32163e7c5dbb5f51fdc11d2eac875efbbcb7e7699090a7e7ff8a8d50795af5d74d9ff98543ef8cdf89ac13d0485278756e0ef00c817745661e1d59fe38e7537",
"1085d78307b1c4b008c57a2e7e5b234658a0a82e4ff1e4aaac72b312fda0fe27d233bc5b10e9cc17fdc7697b540c7d95eb215a19a1a0e20e1abfa126efd568c7",
"4e5c734c7dde011d83eac2b7347b373594f92d7091b9ca34cb9c6f39bdf5a8d2f134379e16d822f6522170ccf2ddd55c84b9e6c64fc927ac4cf8dfb2a17701f2",
"695d83bd990a1117b3d0ce06cc888027d12a054c2677fd82f0d4fbfc93575523e7991a5e35a3752e9b70ce62992e268a877744cdd435f5f130869c9a2074b338",
"a6213743568e3b3158b9184301f3690847554c68457cb40fc9a4b8cfd8d4a118c301a07737aeda0f929c68913c5f51c80394f53bff1c3e83b2e40ca97eba9e15",
"d444bfa2362a96df213d070e33fa841f51334e4e76866b8139e8af3bb3398be2dfaddcbc56b9146de9f68118dc5829e74b0c28d7711907b121f9161cb92b69a9",
"142709d62e28fcccd0af97fad0f8465b971e82201dc51070faa0372aa43e92484be1c1e73ba10906d5d1853db6a4106e0a7bf9800d373d6dee2d46d62ef2a461",
}