replace blake2b implementation (#3481)

* replace blake2b implementation
replace the blake2b-simd with the golang/x/crypto implementation

```
name        old time/op    new time/op     delta
Size64-8       715ns ±13%      614ns ± 3%    ~     (p=0.084 n=6+6)
Size128-8      612ns ± 5%      634ns ± 8%    ~     (p=0.084 n=6+6)
Size1K-8      2.18µs ± 5%     2.09µs ± 7%    ~     (p=0.084 n=6+6)
Size8K-8      13.1µs ± 2%     13.4µs ± 3%    ~     (p=0.084 n=6+6)
Size32K-8     48.5µs ± 1%     49.5µs ± 3%    ~     (p=0.775 n=6+6)
Size128K-8     199µs ± 0%      198µs ± 3%    ~     (p=0.468 n=6+6)

name        old speed      new speed       delta
Size64-8    92.6MB/s ±11%  104.2MB/s ± 3%    ~     (p=0.139 n=6+6)
Size128-8    208MB/s ± 6%    202MB/s ± 8%    ~     (p=0.102 n=6+6)
Size1K-8     466MB/s ± 7%    492MB/s ± 7%    ~     (p=0.139 n=6+6)
Size8K-8     621MB/s ± 2%    610MB/s ± 3%    ~     (p=0.102 n=6+6)
Size32K-8    672MB/s ± 2%    669MB/s ± 1%    ~     (p=0.818 n=6+6)
Size128K-8   657MB/s ± 1%    672MB/s ± 0%  +2.28%  (p=0.002 n=6+6)

name        old time/op   new time/op   delta
Size64-4      334ns ± 1%    243ns ± 0%  -27.14%  (p=0.029 n=4+4)
Size128-4     296ns ± 1%    242ns ± 0%  -18.21%  (p=0.029 n=4+4)
Size1K-4     1.44µs ± 0%   1.28µs ± 0%  -10.83%  (p=0.029 n=4+4)
Size8K-4     10.0µs ± 0%    9.4µs ± 0%   -6.23%  (p=0.029 n=4+4)
Size32K-4    39.8µs ± 1%   37.3µs ± 0%   -6.31%  (p=0.029 n=4+4)
Size128K-4    162µs ± 3%    149µs ± 0%   -7.72%  (p=0.029 n=4+4)

name        old speed     new speed     delta
Size64-4    192MB/s ± 1%  263MB/s ± 0%  +37.24%  (p=0.029 n=4+4)
Size128-4   431MB/s ± 0%  526MB/s ± 0%  +22.04%  (p=0.029 n=4+4)
Size1K-4    713MB/s ± 0%  800MB/s ± 0%  +12.17%  (p=0.029 n=4+4)
Size8K-4    815MB/s ± 0%  869MB/s ± 0%   +6.64%  (p=0.029 n=4+4)
Size32K-4   823MB/s ± 1%  878MB/s ± 0%   +6.72%  (p=0.029 n=4+4)
Size128K-4  810MB/s ± 3%  877MB/s ± 0%   +8.23%  (p=0.029 n=4+4)
```
See: https://go-review.googlesource.com/#/c/34319/
This commit is contained in:
Andreas Auernhammer 2016-12-21 23:20:01 +01:00 committed by Harshavardhana
parent 15b4c49621
commit 1ac36a95aa
24 changed files with 1698 additions and 4501 deletions

View file

@ -24,7 +24,7 @@ import (
"sync"
"github.com/klauspost/reedsolomon"
"github.com/minio/blake2b-simd"
"golang.org/x/crypto/blake2b"
)
// newHashWriters - inititialize a slice of hashes for the disk count.
@ -40,11 +40,19 @@ func newHashWriters(diskCount int, algo string) []hash.Hash {
func newHash(algo string) hash.Hash {
switch algo {
case "blake2b":
return blake2b.New512()
// ignore the error, because New512 without a key never fails
// New512 only returns a non-nil error, if the length of the passed
// key > 64 bytes - but we use blake2b as hash fucntion (no key)
h, _ := blake2b.New512(nil)
return h
// Add new hashes here.
default:
// Default to blake2b.
return blake2b.New512()
// ignore the error, because New512 without a key never fails
// New512 only returns a non-nil error, if the length of the passed
// key > 64 bytes - but we use blake2b as hash fucntion (no key)
h, _ := blake2b.New512(nil)
return h
}
}

View file

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -1,144 +0,0 @@
BLAKE2b-SIMD
============
Pure Go implementation of BLAKE2b using SIMD optimizations.
Introduction
------------
This package was initially based on the pure go [BLAKE2b](https://github.com/dchest/blake2b) implementation of Dmitry Chestnykh and merged with the (`cgo` dependent) AVX optimized [BLAKE2](https://github.com/codahale/blake2) implementation (which in turn is based on the [official implementation](https://github.com/BLAKE2/BLAKE2). It does so by using [Go's Assembler](https://golang.org/doc/asm) for amd64 architectures with a golang only fallback for other architectures.
In addition to AVX there is also support for AVX2 as well as SSE. Best performance is obtained with AVX2 which gives roughly a **4X** performance increase approaching hashing speeds of **1GB/sec** on a single core.
Benchmarks
----------
This is a summary of the performance improvements. Full details are shown below.
| Technology | 128K |
| ---------- |:-----:|
| AVX2 | 3.94x |
| AVX | 3.28x |
| SSE | 2.85x |
asm2plan9s
----------
In order to be able to work more easily with AVX2/AVX instructions, a separate tool was developed to convert AVX2/AVX instructions into the corresponding BYTE sequence as accepted by Go assembly. See [asm2plan9s](https://github.com/minio/asm2plan9s) for more information.
bt2sum
------
[bt2sum](https://github.com/s3git/bt2sum) is a utility that takes advantages of the BLAKE2b SIMD optimizations to compute check sums using the BLAKE2 Tree hashing mode in so called 'unlimited fanout' mode.
Technical details
-----------------
BLAKE2b is a hashing algorithm that operates on 64-bit integer values. The AVX2 version uses the 256-bit wide YMM registers in order to essentially process four operations in parallel. AVX and SSE operate on 128-bit values simultaneously (two operations in parallel). Below are excerpts from `compressAvx2_amd64.s`, `compressAvx_amd64.s`, and `compress_generic.go` respectively.
```
VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */
```
```
VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */
VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */
```
```
v0 += v4
v1 += v5
v2 += v6
v3 += v7
```
Detailed benchmarks
-------------------
Example performance metrics were generated on Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz - 6 physical cores, 12 logical cores running Ubuntu GNU/Linux with kernel version 4.4.0-24-generic (vanilla with no optimizations).
### AVX2
```
$ benchcmp go.txt avx2.txt
benchmark old ns/op new ns/op delta
BenchmarkHash64-12 1481 849 -42.67%
BenchmarkHash128-12 1428 746 -47.76%
BenchmarkHash1K-12 6379 2227 -65.09%
BenchmarkHash8K-12 37219 11714 -68.53%
BenchmarkHash32K-12 140716 35935 -74.46%
BenchmarkHash128K-12 561656 142634 -74.60%
benchmark old MB/s new MB/s speedup
BenchmarkHash64-12 43.20 75.37 1.74x
BenchmarkHash128-12 89.64 171.35 1.91x
BenchmarkHash1K-12 160.52 459.69 2.86x
BenchmarkHash8K-12 220.10 699.32 3.18x
BenchmarkHash32K-12 232.87 911.85 3.92x
BenchmarkHash128K-12 233.37 918.93 3.94x
```
### AVX2: Comparison to other hashing techniques
```
$ go test -bench=Comparison
BenchmarkComparisonMD5-12 1000 1726121 ns/op 607.48 MB/s
BenchmarkComparisonSHA1-12 500 2005164 ns/op 522.94 MB/s
BenchmarkComparisonSHA256-12 300 5531036 ns/op 189.58 MB/s
BenchmarkComparisonSHA512-12 500 3423030 ns/op 306.33 MB/s
BenchmarkComparisonBlake2B-12 1000 1232690 ns/op 850.64 MB/s
```
Benchmarks below were generated on a MacBook Pro with a 2.7 GHz Intel Core i7.
### AVX
```
$ benchcmp go.txt avx.txt
benchmark old ns/op new ns/op delta
BenchmarkHash64-8 813 458 -43.67%
BenchmarkHash128-8 766 401 -47.65%
BenchmarkHash1K-8 4881 1763 -63.88%
BenchmarkHash8K-8 36127 12273 -66.03%
BenchmarkHash32K-8 140582 43155 -69.30%
BenchmarkHash128K-8 567850 173246 -69.49%
benchmark old MB/s new MB/s speedup
BenchmarkHash64-8 78.63 139.57 1.78x
BenchmarkHash128-8 166.98 318.73 1.91x
BenchmarkHash1K-8 209.76 580.68 2.77x
BenchmarkHash8K-8 226.76 667.46 2.94x
BenchmarkHash32K-8 233.09 759.29 3.26x
BenchmarkHash128K-8 230.82 756.56 3.28x
```
### SSE
```
$ benchcmp go.txt sse.txt
benchmark old ns/op new ns/op delta
BenchmarkHash64-8 813 478 -41.21%
BenchmarkHash128-8 766 411 -46.34%
BenchmarkHash1K-8 4881 1870 -61.69%
BenchmarkHash8K-8 36127 12427 -65.60%
BenchmarkHash32K-8 140582 49512 -64.78%
BenchmarkHash128K-8 567850 199040 -64.95%
benchmark old MB/s new MB/s speedup
BenchmarkHash64-8 78.63 133.78 1.70x
BenchmarkHash128-8 166.98 311.23 1.86x
BenchmarkHash1K-8 209.76 547.37 2.61x
BenchmarkHash8K-8 226.76 659.20 2.91x
BenchmarkHash32K-8 233.09 661.81 2.84x
BenchmarkHash128K-8 230.82 658.52 2.85x
```
License
-------
Released under the Apache License v2.0. You can find the complete text in the file LICENSE.
Contributing
------------
Contributions are welcome, please send PRs for any enhancements.

View file

@ -1,301 +0,0 @@
// Written in 2012 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// http://creativecommons.org/publicdomain/zero/1.0/
// Package blake2b implements BLAKE2b cryptographic hash function.
package blake2b
import (
"encoding/binary"
"errors"
"hash"
)
const (
BlockSize = 128 // block size of algorithm
Size = 64 // maximum digest size
SaltSize = 16 // maximum salt size
PersonSize = 16 // maximum personalization string size
KeySize = 64 // maximum size of key
)
type digest struct {
h [8]uint64 // current chain value
t [2]uint64 // message bytes counter
f [2]uint64 // finalization flags
x [BlockSize]byte // buffer for data not yet compressed
nx int // number of bytes in buffer
ih [8]uint64 // initial chain value (after config)
paddedKey [BlockSize]byte // copy of key, padded with zeros
isKeyed bool // indicates whether hash was keyed
size uint8 // digest size in bytes
isLastNode bool // indicates processing of the last node in tree hashing
}
// Initialization values.
var iv = [8]uint64{
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
}
// Config is used to configure hash function parameters and keying.
// All parameters are optional.
type Config struct {
Size uint8 // digest size (if zero, default size of 64 bytes is used)
Key []byte // key for prefix-MAC
Salt []byte // salt (if < 16 bytes, padded with zeros)
Person []byte // personalization (if < 16 bytes, padded with zeros)
Tree *Tree // parameters for tree hashing
}
// Tree represents parameters for tree hashing.
type Tree struct {
Fanout uint8 // fanout
MaxDepth uint8 // maximal depth
LeafSize uint32 // leaf maximal byte length (0 for unlimited)
NodeOffset uint64 // node offset (0 for first, leftmost or leaf)
NodeDepth uint8 // node depth (0 for leaves)
InnerHashSize uint8 // inner hash byte length
IsLastNode bool // indicates processing of the last node of layer
}
var (
defaultConfig = &Config{Size: Size}
config256 = &Config{Size: 32}
)
func verifyConfig(c *Config) error {
if c.Size > Size {
return errors.New("digest size is too large")
}
if len(c.Key) > KeySize {
return errors.New("key is too large")
}
if len(c.Salt) > SaltSize {
// Smaller salt is okay: it will be padded with zeros.
return errors.New("salt is too large")
}
if len(c.Person) > PersonSize {
// Smaller personalization is okay: it will be padded with zeros.
return errors.New("personalization is too large")
}
if c.Tree != nil {
if c.Tree.Fanout == 1 {
return errors.New("fanout of 1 is not allowed in tree mode")
}
if c.Tree.MaxDepth < 2 {
return errors.New("incorrect tree depth")
}
if c.Tree.InnerHashSize < 1 || c.Tree.InnerHashSize > Size {
return errors.New("incorrect tree inner hash size")
}
}
return nil
}
// New returns a new hash.Hash configured with the given Config.
// Config can be nil, in which case the default one is used, calculating 64-byte digest.
// Returns non-nil error if Config contains invalid parameters.
func New(c *Config) (hash.Hash, error) {
if c == nil {
c = defaultConfig
} else {
if c.Size == 0 {
// Set default size if it's zero.
c.Size = Size
}
if err := verifyConfig(c); err != nil {
return nil, err
}
}
d := new(digest)
d.initialize(c)
return d, nil
}
// initialize initializes digest with the given
// config, which must be non-nil and verified.
func (d *digest) initialize(c *Config) {
// Create parameter block.
var p [BlockSize]byte
p[0] = c.Size
p[1] = uint8(len(c.Key))
if c.Salt != nil {
copy(p[32:], c.Salt)
}
if c.Person != nil {
copy(p[48:], c.Person)
}
if c.Tree != nil {
p[2] = c.Tree.Fanout
p[3] = c.Tree.MaxDepth
binary.LittleEndian.PutUint32(p[4:], c.Tree.LeafSize)
binary.LittleEndian.PutUint64(p[8:], c.Tree.NodeOffset)
p[16] = c.Tree.NodeDepth
p[17] = c.Tree.InnerHashSize
} else {
p[2] = 1
p[3] = 1
}
// Initialize.
d.size = c.Size
for i := 0; i < 8; i++ {
d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(p[i*8:])
}
if c.Tree != nil && c.Tree.IsLastNode {
d.isLastNode = true
}
// Process key.
if c.Key != nil {
copy(d.paddedKey[:], c.Key)
d.Write(d.paddedKey[:])
d.isKeyed = true
}
// Save a copy of initialized state.
copy(d.ih[:], d.h[:])
}
// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum.
func New512() hash.Hash {
d := new(digest)
d.initialize(defaultConfig)
return d
}
// New256 returns a new hash.Hash computing the BLAKE2b 32-byte checksum.
func New256() hash.Hash {
d := new(digest)
d.initialize(config256)
return d
}
// NewMAC returns a new hash.Hash computing BLAKE2b prefix-
// Message Authentication Code of the given size in bytes
// (up to 64) with the given key (up to 64 bytes in length).
func NewMAC(outBytes uint8, key []byte) hash.Hash {
d, err := New(&Config{Size: outBytes, Key: key})
if err != nil {
panic(err.Error())
}
return d
}
// Reset resets the state of digest to the initial state
// after configuration and keying.
func (d *digest) Reset() {
copy(d.h[:], d.ih[:])
d.t[0] = 0
d.t[1] = 0
d.f[0] = 0
d.f[1] = 0
d.nx = 0
if d.isKeyed {
d.Write(d.paddedKey[:])
}
}
// Size returns the digest size in bytes.
func (d *digest) Size() int { return int(d.size) }
// BlockSize returns the algorithm block size in bytes.
func (d *digest) BlockSize() int { return BlockSize }
func (d *digest) Write(p []byte) (nn int, err error) {
nn = len(p)
left := BlockSize - d.nx
if len(p) > left {
// Process buffer.
copy(d.x[d.nx:], p[:left])
p = p[left:]
compress(d, d.x[:])
d.nx = 0
}
// Process full blocks except for the last one.
if len(p) > BlockSize {
n := len(p) &^ (BlockSize - 1)
if n == len(p) {
n -= BlockSize
}
compress(d, p[:n])
p = p[n:]
}
// Fill buffer.
d.nx += copy(d.x[d.nx:], p)
return
}
// Sum returns the calculated checksum.
func (d *digest) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
hash := d0.checkSum()
return append(in, hash[:d0.size]...)
}
func (d *digest) checkSum() [Size]byte {
// Do not create unnecessary copies of the key.
if d.isKeyed {
for i := 0; i < len(d.paddedKey); i++ {
d.paddedKey[i] = 0
}
}
dec := BlockSize - uint64(d.nx)
if d.t[0] < dec {
d.t[1]--
}
d.t[0] -= dec
// Pad buffer with zeros.
for i := d.nx; i < len(d.x); i++ {
d.x[i] = 0
}
// Set last block flag.
d.f[0] = 0xffffffffffffffff
if d.isLastNode {
d.f[1] = 0xffffffffffffffff
}
// Compress last block.
compress(d, d.x[:])
var out [Size]byte
j := 0
for _, s := range d.h[:(d.size-1)/8+1] {
out[j+0] = byte(s >> 0)
out[j+1] = byte(s >> 8)
out[j+2] = byte(s >> 16)
out[j+3] = byte(s >> 24)
out[j+4] = byte(s >> 32)
out[j+5] = byte(s >> 40)
out[j+6] = byte(s >> 48)
out[j+7] = byte(s >> 56)
j += 8
}
return out
}
// Sum512 returns a 64-byte BLAKE2b hash of data.
func Sum512(data []byte) [64]byte {
var d digest
d.initialize(defaultConfig)
d.Write(data)
return d.checkSum()
}
// Sum256 returns a 32-byte BLAKE2b hash of data.
func Sum256(data []byte) (out [32]byte) {
var d digest
d.initialize(config256)
d.Write(data)
sum := d.checkSum()
copy(out[:], sum[:32])
return
}

View file

@ -1,47 +0,0 @@
//+build !noasm
//+build !appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
//go:noescape
func compressAVX2Loop(p []uint8, in, iv, t, f, shffle, out []uint64)
func compressAVX2(d *digest, p []uint8) {
var (
in [8]uint64
out [8]uint64
shffle [8]uint64
)
// vector for PSHUFB instruction
shffle[0] = 0x0201000706050403
shffle[1] = 0x0a09080f0e0d0c0b
shffle[2] = 0x0201000706050403
shffle[3] = 0x0a09080f0e0d0c0b
shffle[4] = 0x0100070605040302
shffle[5] = 0x09080f0e0d0c0b0a
shffle[6] = 0x0100070605040302
shffle[7] = 0x09080f0e0d0c0b0a
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
compressAVX2Loop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
}

View file

@ -1,671 +0,0 @@
//+build !noasm !appengine
//
// Minio Cloud Storage, (C) 2016 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
// Based on AVX2 implementation from https://github.com/sneves/blake2-avx2/blob/master/blake2b-common.h
//
// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent
//
// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as:
// #define ROUND(r) \
// LOAD_MSG_ ##r ##_1(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_2(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
// LOAD_MSG_ ##r ##_3(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_4(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
//
// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go
//
// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly)
//
// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and
// rounds 2 & 12 are identical)
//
#define G1 \
\ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc4 \ // VPADDQ YMM0,YMM0,YMM4 /* v0 += m[0], v1 += m[2], v2 += m[4], v3 += m[6] */
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */
BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */
BYTE $0xc5; BYTE $0xfd; BYTE $0x70; BYTE $0xdb; BYTE $0xb1 \ // VPSHUFD YMM3,YMM3,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = */
BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */
BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */
BYTE $0xc4; BYTE $0xe2; BYTE $0x75; BYTE $0x00; BYTE $0xce // VPSHUFB YMM1,YMM1,YMM6 /* v4 = v4<<(64-24) | v4>>24, ..., ..., v7 = v7<<(64-24) | v7>>24 */
#define G2 \
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc5 \ // VPADDQ YMM0,YMM0,YMM5 /* v0 += m[1], v1 += m[3], v2 += m[5], v3 += m[7] */
BYTE $0xc5; BYTE $0xfd; BYTE $0xd4; BYTE $0xc1 \ // VPADDQ YMM0,YMM0,YMM1 /* v0 += v4, v1 += v5, v2 += v6, v3 += v7 */
BYTE $0xc5; BYTE $0xe5; BYTE $0xef; BYTE $0xd8 \ // VPXOR YMM3,YMM3,YMM0 /* v12 ^= v0, v13 ^= v1, v14 ^= v2, v15 ^= v3 */
BYTE $0xc4; BYTE $0xe2; BYTE $0x65; BYTE $0x00; BYTE $0xdf \ // VPSHUFB YMM3,YMM3,YMM7 /* v12 = v12<<(64-16) | v12>>16, ..., ..., v15 = v15<<(64-16) | v15>>16 */
BYTE $0xc5; BYTE $0xed; BYTE $0xd4; BYTE $0xd3 \ // VPADDQ YMM2,YMM2,YMM3 /* v8 += v12, v9 += v13, v10 += v14, v11 += v15 */
BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xca \ // VPXOR YMM1,YMM1,YMM2 /* v4 ^= v8, v5 ^= v9, v6 ^= v10, v7 ^= v11 */
BYTE $0xc5; BYTE $0x75; BYTE $0xd4; BYTE $0xf9 \ // VPADDQ YMM15,YMM1,YMM1 /* temp reg = reg*2 */
BYTE $0xc5; BYTE $0xf5; BYTE $0x73; BYTE $0xd1; BYTE $0x3f \ // VPSRLQ YMM1,YMM1,0x3f /* reg = reg>>63 */
BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcf // VPXOR YMM1,YMM1,YMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */
#define DIAGONALIZE \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x93
BYTE $0x93 \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e
BYTE $0x4e \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x39
BYTE $0x39 \
// DO NOT DELETE -- macro delimiter (previous line extended)
#define UNDIAGONALIZE \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb \ // VPERMQ YMM3, YMM3, 0x39
BYTE $0x39 \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2 \ // VPERMQ YMM2, YMM2, 0x4e
BYTE $0x4e \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 \ // VPERMQ YMM1, YMM1, 0x93
BYTE $0x93 \
// DO NOT DELETE -- macro delimiter (previous line extended)
#define LOAD_SHUFFLE \
MOVQ shffle+120(FP), SI \ // SI: &shuffle
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x36 \ // VMOVDQU YMM6, [rsi]
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x7e; BYTE $0x20 // VMOVDQU YMM7, 32[rsi]
// func compressAVX2Loop(compressSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·compressAVX2Loop(SB), 7, $0
// REGISTER USE
// Y0 - Y3: v0 - v15
// Y4 - Y5: m[0] - m[7]
// Y6 - Y7: shuffle value
// Y8 - Y9: temp registers
// Y10 -Y13: copy of full message
// Y15: temp register
// Load digest
MOVQ in+24(FP), SI // SI: &in
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x06 // VMOVDQU YMM0, [rsi]
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x4e; BYTE $0x20 // VMOVDQU YMM1, 32[rsi]
// Already store digest into &out (so we can reload it later generically)
MOVQ out+144(FP), SI // SI: &out
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1
// Initialize message pointer and loop counter
MOVQ message+0(FP), DX // DX: &p (message)
MOVQ message_len+8(FP), R8 // R8: len(message)
SHRQ $7, R8 // len(message) / 128
CMPQ R8, $0
JEQ complete
loop:
// Increment counter
MOVQ t+72(FP), SI // SI: &t
MOVQ 0(SI), R9 //
ADDQ $128, R9 // /* d.t[0] += BlockSize */
MOVQ R9, 0(SI) //
CMPQ R9, $128 // /* if d.t[0] < BlockSize { */
JGE noincr //
MOVQ 8(SI), R9 //
ADDQ $1, R9 // /* d.t[1]++ */
MOVQ R9, 8(SI) //
noincr: // /* } */
// Load initialization vector
MOVQ iv+48(FP), SI // SI: &iv
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x16 // VMOVDQU YMM2, [rsi]
BYTE $0xc5; BYTE $0xfe; BYTE $0x6f; BYTE $0x5e; BYTE $0x20 // VMOVDQU YMM3, 32[rsi]
MOVQ t+72(FP), SI // SI: &t
BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 0 /* Y8 = t[0]+t[1] */
BYTE $0x00
MOVQ t+96(FP), SI // SI: &f
BYTE $0xc4; BYTE $0x63; BYTE $0x3d; BYTE $0x38; BYTE $0x06 // VINSERTI128 YMM8, YMM8, [rsi], 1 /* Y8 = t[0]+t[1]+f[0]+f[1] */
BYTE $0x01
BYTE $0xc4; BYTE $0xc1; BYTE $0x65; BYTE $0xef; BYTE $0xd8 // VPXOR YMM3,YMM3,YMM8 /* Y3 = Y3 ^ Y8 */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x12 // VMOVDQU YMM10, [rdx] /* Y10 = m[0]+ m[1]+ m[2]+ m[3] */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x5a; BYTE $0x20 // VMOVDQU YMM11, 32[rdx] /* Y11 = m[4]+ m[5]+ m[6]+ m[7] */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x62; BYTE $0x40 // VMOVDQU YMM12, 64[rdx] /* Y12 = m[8]+ m[9]+m[10]+m[11] */
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6a; BYTE $0x60 // VMOVDQU YMM13, 96[rdx] /* Y13 = m[12]+m[13]+m[14]+m[15] */
LOAD_SHUFFLE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 2
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */
BYTE $0x02
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */
BYTE $0x01
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */
BYTE $0x09
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 3
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc2 // VPUNPCKLQDQ YMM8, YMM12, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 4
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM12, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x08
BYTE $0x08
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM11, YMM12
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 5
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM12, YMM11
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30
BYTE $0x30
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM11, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM12, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 6
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x21
BYTE $0x21
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc4 // VPUNPCKLQDQ YMM8, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM12, YMM10
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM13, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 7
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x55
BYTE $0x55
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM13, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x30
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM13, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x20
BYTE $0x20
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x01
BYTE $0x01
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ YMM8, YMM11, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31
BYTE $0x31
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 8
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc3 // VPUNPCKHQDQ YMM8, YMM13, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ YMM9, YMM13, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM10, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xc5 // VPUNPCKHQDQ YMM8, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM12, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x0c
BYTE $0x0c
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xcc // VPUNPCKLQDQ YMM9, YMM11, YMM12
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30
BYTE $0x30
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 9
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xca // VPERMQ YMM9, YMM10, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM12, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0x00
BYTE $0x00
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31
BYTE $0x31
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcc // VPERMQ YMM9, YMM12, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xc9 // VPUNPCKHQDQ YMM9, YMM10, YMM9
BYTE $0xc4; BYTE $0xc3; BYTE $0x15; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM13, YMM9, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc3 // VPERMQ YMM8, YMM11, 0xff
BYTE $0xff
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ YMM8, YMM10, YMM8
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x04
BYTE $0x04
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 10
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x20
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc3 // VPUNPCKLQDQ YMM8, YMM10, YMM11
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcb // VPERMQ YMM9, YMM11, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x31
BYTE $0x31
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6d; BYTE $0xc4 // VPUNPCKHQDQ YMM8, YMM13, YMM12
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM10, YMM13
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x60
BYTE $0x60
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x31
BYTE $0x31
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc5 // VPERMQ YMM8, YMM13, 0xaa
BYTE $0xaa
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xc0 // VPUNPCKHQDQ YMM8, YMM12, YMM8
BYTE $0xc4; BYTE $0x41; BYTE $0x15; BYTE $0x6c; BYTE $0xca // VPUNPCKLQDQ YMM9, YMM13, YMM10
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x21
BYTE $0x21
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 1
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6c; BYTE $0xe3 // VPUNPCKLQDQ YMM4, YMM10, YMM11 /* m[0], m[4], m[2], m[6] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x2d; BYTE $0x6d; BYTE $0xeb // VPUNPCKHQDQ YMM5, YMM10, YMM11 /* m[1], m[5], m[3], m[7] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6c; BYTE $0xe5 // VPUNPCKLQDQ YMM4, YMM12, YMM13 /* m[8], m[12], m[10], m[14] */
BYTE $0xc4; BYTE $0xc1; BYTE $0x1d; BYTE $0x6d; BYTE $0xed // VPUNPCKHQDQ YMM5, YMM12, YMM13 /* m[9], m[13], m[11], m[15] */
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xe4 // VPERMQ YMM4, YMM4, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xed // VPERMQ YMM5, YMM5, 0xd8 /* 0x1101 1000 = 0xd8 */
BYTE $0xd8
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 2
///////////////////////////////////////////////////////////////////////////
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM11, YMM13 /* m[4], ____, ____, m[14] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x03 /* m[14], m[4], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0x41; BYTE $0x1d; BYTE $0x6d; BYTE $0xcd // VPUNPCKHQDQ YMM9, YMM12, YMM13 /* m[9], m[13], ____, ____ */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[9], m[13], ____, ____ */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc4 // VPERMQ YMM8, YMM12, 0x02 /* m[10], m[8], ____, ____ */ /* xxxx 0010 = 0x02 */
BYTE $0x02
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xcd // VPERMQ YMM9, YMM13, 0x30 /* ____, ____, m[15], ____ */ /* xx11 xxxx = 0x30 */
BYTE $0x30
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6c; BYTE $0xcb // VPUNPCKLQDQ YMM9, YMM9, YMM11 /* ____, ____, m[15], m[6] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
DIAGONALIZE
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc2 // VPERMQ YMM8, YMM10, 0x01 /* m[1], m[0], ____, ____ */ /* xxxx 0001 = 0x01 */
BYTE $0x01
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xcc // VPUNPCKHQDQ YMM9, YMM11, YMM12 /* m[5], ____, ____, m[11] */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc9 // VPERMQ YMM9, YMM9, 0x03 /* m[11], m[5], ____, ____ */ /* xxxx 0011 = 0x03 */
BYTE $0x03
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe1 // VPERM2I128 YMM4, YMM8, YMM9, 0x20 /* m[1], m[0], m[11], m[5] */ /* 0010 0000 = 0x20 */
BYTE $0x20
BYTE $0xc4; BYTE $0x41; BYTE $0x2d; BYTE $0x6c; BYTE $0xc5 // VPUNPCKLQDQ YMM8, YMM10, YMM13 /* ___, m[12], m[2], ____ */
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0 // VPERMQ YMM8, YMM8, 0x09 /* m[12], m[2], ____, ____ */ /* xxxx 1001 = 0x09 */
BYTE $0x09
BYTE $0xc4; BYTE $0x41; BYTE $0x25; BYTE $0x6d; BYTE $0xca // VPUNPCKHQDQ YMM9, YMM11, YMM10 /* ____, ____, m[7], m[3] */
BYTE $0xc4; BYTE $0xc3; BYTE $0x3d; BYTE $0x46; BYTE $0xe9 // VPERM2I128 YMM5, YMM8, YMM9, 0x30 /* m[9], m[13], m[15], m[6] */ /* 0011 0000 = 0x30 */
BYTE $0x30
G1
G2
UNDIAGONALIZE
// Reload digest (most current value store in &out)
MOVQ out+144(FP), SI // SI: &in
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x26 // VMOVDQU YMM12, [rsi]
BYTE $0xc5; BYTE $0x7e; BYTE $0x6f; BYTE $0x6e; BYTE $0x20 // VMOVDQU YMM13, 32[rsi]
BYTE $0xc5; BYTE $0xfd; BYTE $0xef; BYTE $0xc2 // VPXOR YMM0,YMM0,YMM2 /* X0 = X0 ^ X4, X1 = X1 ^ X5 */
BYTE $0xc4; BYTE $0xc1; BYTE $0x7d; BYTE $0xef; BYTE $0xc4 // VPXOR YMM0,YMM0,YMM12 /* X0 = X0 ^ X12, X1 = X1 ^ X13 */
BYTE $0xc5; BYTE $0xf5; BYTE $0xef; BYTE $0xcb // VPXOR YMM1,YMM1,YMM3 /* X2 = X2 ^ X6, X3 = X3 ^ X7 */
BYTE $0xc4; BYTE $0xc1; BYTE $0x75; BYTE $0xef; BYTE $0xcd // VPXOR YMM1,YMM1,YMM13 /* X2 = X2 ^ X14, X3 = X3 ^ X15 */
// Store digest into &out
MOVQ out+144(FP), SI // SI: &out
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x06 // VMOVDQU [rsi], YMM0
BYTE $0xc5; BYTE $0xfe; BYTE $0x7f; BYTE $0x4e; BYTE $0x20 // VMOVDQU 32[rsi], YMM1
// Increment message pointer and check if there's more to do
ADDQ $128, DX // message += 128
SUBQ $1, R8
JNZ loop
complete:
BYTE $0xc5; BYTE $0xf8; BYTE $0x77 // VZEROUPPER /* Prevent further context switches */
RET

View file

@ -1,41 +0,0 @@
//+build !noasm
//+build !appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
//go:noescape
func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64)
func compressAVX(d *digest, p []uint8) {
var (
in [8]uint64
out [8]uint64
shffle [2]uint64
)
// vector for PSHUFB instruction
shffle[0] = 0x0201000706050403
shffle[1] = 0x0a09080f0e0d0c0b
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
blockAVXLoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
}

View file

@ -1,682 +0,0 @@
//+build !noasm !appengine
//
// Minio Cloud Storage, (C) 2016 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c
//
// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent
//
// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as:
// #define ROUND(r) \
// LOAD_MSG_ ##r ##_1(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_2(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
// LOAD_MSG_ ##r ##_3(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_4(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
//
// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go
//
// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly)
//
// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and
// rounds 2 & 12 are identical)
//
#define G1 \
\ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd479c1c4; BYTE $0xc0 \ // VPADDQ XMM0,XMM0,XMM8 /* v0 += m[0], v1 += m[2] */
LONG $0xd471c1c4; BYTE $0xc9 \ // VPADDQ XMM1,XMM1,XMM9 /* v2 += m[4], v3 += m[6] */
LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf670f9c5; BYTE $0xb1 \ // VPSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */
LONG $0xff70f9c5; BYTE $0xb1 \ // VPSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */
LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
LONG $0x0069c2c4; BYTE $0xd4 \ // VPSHUFB XMM2,XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */
LONG $0x0061c2c4; BYTE $0xdc // VPSHUFB XMM3,XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */
#define G2 \
\ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd479c1c4; BYTE $0xc2 \ // VPADDQ XMM0,XMM0,XMM10 /* v0 += m[1], v1 += m[3] */
LONG $0xd471c1c4; BYTE $0xcb \ // VPADDQ XMM1,XMM1,XMM11 /* v2 += m[5], v3 += m[7] */
LONG $0xc2d4f9c5 \ // VPADDQ XMM0,XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd4f1c5 \ // VPADDQ XMM1,XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0efc9c5 \ // VPXOR XMM6,XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9efc1c5 \ // VPXOR XMM7,XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf670fbc5; BYTE $0x39 \ // VPSHUFLW XMM6,XMM6,0x39 /* combined with next ... */
LONG $0xf670fac5; BYTE $0x39 \ // VPSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */
LONG $0xff70fbc5; BYTE $0x39 \ // VPSHUFLW XMM7,XMM7,0x39 /* combined with next ... */
LONG $0xff70fac5; BYTE $0x39 \ // VPSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */
LONG $0xe6d4d9c5 \ // VPADDQ XMM4,XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd4d1c5 \ // VPADDQ XMM5,XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4efe9c5 \ // VPXOR XMM2,XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddefe1c5 \ // VPXOR XMM3,XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
LONG $0xfad469c5 \ // VPADDQ XMM15,XMM2,XMM2 /* temp reg = reg*2 */
LONG $0xd273e9c5; BYTE $0x3f \ // VPSRLQ XMM2,XMM2,0x3f /* reg = reg>>63 */
LONG $0xef69c1c4; BYTE $0xd7 \ // VPXOR XMM2,XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */
LONG $0xfbd461c5 \ // VPADDQ XMM15,XMM3,XMM3 /* temp reg = reg*2 */
LONG $0xd373e1c5; BYTE $0x3f \ // VPSRLQ XMM3,XMM3,0x3f /* reg = reg>>63 */
LONG $0xef61c1c4; BYTE $0xdf // VPXOR XMM3,XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */
#define DIAGONALIZE \
\ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X6, X13 \ /* t0 = row4l;\ */
MOVOU X2, X14 \ /* t1 = row2l;\ */
MOVOU X4, X6 \ /* row4l = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X6, X5 \ /* row3h = row4l;\ */
LONG $0x6c1141c4; BYTE $0xfd \ // VPUNPCKLQDQ XMM15, XMM13, XMM13 /* _mm_unpacklo_epi64(t0, t0) */
LONG $0x6d41c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM7, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */
LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
LONG $0x6d11c1c4; BYTE $0xff \ // VPUNPCKHQDQ XMM7, XMM13, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
LONG $0x6d69c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */
LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d61c1c4; BYTE $0xdf // VPUNPCKHQDQ XMM3, XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */
#define UNDIAGONALIZE \
\ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X4, X13 \ /* t0 = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X13, X5 \ /* row3h = t0;\ */
MOVOU X2, X13 \ /* t0 = row2l;\ */
MOVOU X6, X14 \ /* t1 = row4l;\ */
LONG $0xfa6c69c5 \ // VPUNPCKLQDQ XMM15, XMM2, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */
LONG $0x6d61c1c4; BYTE $0xd7 \ // VPUNPCKHQDQ XMM2, XMM3, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */
LONG $0xfb6c61c5 \ // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
LONG $0x6d11c1c4; BYTE $0xdf \ // VPUNPCKHQDQ XMM3, XMM13, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0xff6c41c5 \ // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
LONG $0x6d49c1c4; BYTE $0xf7 \ // VPUNPCKHQDQ XMM6, XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */
LONG $0x6c0941c4; BYTE $0xfe \ // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d41c1c4; BYTE $0xff // VPUNPCKHQDQ XMM7, XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */
#define LOAD_SHUFFLE \
\ // Load shuffle value
MOVQ shffle+120(FP), SI \ // SI: &shuffle
MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a
// func blockAVXLoop(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·blockAVXLoop(SB), 7, $0
// REGISTER USE
// R8: loop counter
// DX: message pointer
// SI: temp pointer for loading
// X0 - X7: v0 - v15
// X8 - X11: m[0] - m[7]
// X12: shuffle value
// X13 - X15: temp registers
// Load digest
MOVQ in+24(FP), SI // SI: &in
MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Already store digest into &out (so we can reload it later generically)
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Initialize message pointer and loop counter
MOVQ message+0(FP), DX // DX: &p (message)
MOVQ message_len+8(FP), R8 // R8: len(message)
SHRQ $7, R8 // len(message) / 128
CMPQ R8, $0
JEQ complete
loop:
// Increment counter
MOVQ t+72(FP), SI // SI: &t
MOVQ 0(SI), R9
ADDQ $128, R9 // /* d.t[0] += BlockSize */
MOVQ R9, 0(SI)
CMPQ R9, $128 // /* if d.t[0] < BlockSize { */
JGE noincr
MOVQ 8(SI), R9
ADDQ $1, R9 // /* d.t[1]++ */
MOVQ R9, 8(SI)
noincr: // /* } */
// Load initialization vector
MOVQ iv+48(FP), SI // SI: &iv
MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */
MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */
MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */
MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */
MOVQ t+72(FP), SI // SI: &t
MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */
PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */
MOVQ t+96(FP), SI // SI: &f
MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */
PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */
///////////////////////////////////////////////////////////////////////////
// R O U N D 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */
LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */
LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */
LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */
LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 3
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x0f0943c4; WORD $0x08c5 // VPALIGNR XMM8, XMM14, XMM13, 0x8 /* m[11], m[12] */
LONG $0x6d1941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM12, XMM15 /* m[5], m[15] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c0141c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM15, XMM12 /* m[8], m[0] */
LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[2], ___ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[3] */
LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[10], ___ */
LONG $0x6d1141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM13, XMM14 /* m[7], m[9] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X14 // X14 = m[4]+ m[5]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c0141c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM15, XMM13 /* m[14], m[6] */
LONG $0x0f0943c4; WORD $0x08dc // VPALIGNR XMM11, XMM14, XMM12, 0x8 /* m[1], m[4] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 4
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6d1141c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM13, XMM12 /* m[7], m[3] */
LONG $0x6d0141c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM15, XMM14 /* m[13], m[11] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 112(DX), X14 // X14 = m[14]+m[15]
LONG $0x6d1141c4; BYTE $0xd4 // VPUNPCKHQDQ XMM10, XMM13, XMM12 /* m[9], m[1] */
LONG $0x6c0141c4; BYTE $0xde // VPUNPCKLQDQ XMM11, XMM15, XMM14 /* m[12], m[14] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d1141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM13, XMM13 /* ___, m[5] */
LONG $0x6c1941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM12, XMM8 /* m[2], ____ */
LONG $0x6d0141c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM15, XMM15 /* ___, m[15] */
LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[4], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[6], m[10] */
LONG $0x6c1941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM12, XMM15 /* m[0], m[8] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 5
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[9], m[5] */
LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[2], m[10] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM14, XMM14 /* ___, m[7] */
LONG $0x6c1941c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM12, XMM10 /* m[0], ____ */
LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[15] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[11] */
LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[14], ____ */
LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */
LONG $0x6c1141c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM13, XMM9 /* m[6], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x0f0943c4; WORD $0x08d4 // VPALIGNR XMM10, XMM14, XMM12, 0x8 /* m[1], m[12] */
LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[13] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[8], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 6
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c1141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM13, XMM14 /* m[2], m[6] */
LONG $0x6c1941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM12, XMM15 /* m[0], m[8] */
MOVOU 80(DX), X12 // X12 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[10] */
LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[11], m[3] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xc6 // VPUNPCKHQDQ XMM8, XMM14, XMM14 /* ___, m[7] */
LONG $0x6c1141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM13, XMM8 /* m[4], ____ */
LONG $0x6d0141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM15, XMM12 /* m[15], m[1] */
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6d0941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM14, XMM13 /* m[13], m[5] */
LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[9] */
LONG $0x6c0141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM15, XMM11 /* m[14], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 7
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d1941c4; BYTE $0xc4 // VPUNPCKHQDQ XMM8, XMM12, XMM12 /* ___, m[1] */
LONG $0x6c0941c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM14, XMM8 /* m[12], ____ */
LONG $0x6c0141c4; BYTE $0xcd // VPUNPCKLQDQ XMM9, XMM15, XMM13 /* m[14], m[4] */
MOVOU 80(DX), X12 // X12 = m[10]+m[11]
LONG $0x6d1141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM13, XMM15 /* m[5], m[15] */
LONG $0x0f1943c4; WORD $0x08de // VPALIGNR XMM11, XMM12, XMM14, 0x8 /* m[13], m[10] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[6] */
LONG $0x0f0943c4; WORD $0x08ce // VPALIGNR XMM9, XMM14, XMM14, 0x8 /* m[9], m[8] */
MOVOU 16(DX), X14 // X14 = m[2]+ m[3]
LONG $0x6d1141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM13, XMM14 /* m[7], m[3] */
LONG $0x6d0141c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM15, XMM15 /* ___, m[11] */
LONG $0x6c0941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM14, XMM11 /* m[2], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 8
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0941c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM14, XMM13 /* m[13], m[7] */
LONG $0x6d1941c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM12, XMM12 /* ___, m[3] */
LONG $0x6c0941c4; BYTE $0xc9 // VPUNPCKLQDQ XMM9, XMM14, XMM9 /* m[12], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
LONG $0x0f0143c4; WORD $0x08d6 // VPALIGNR XMM10, XMM15, XMM14, 0x8 /* m[11], m[14] */
LONG $0x6d1941c4; BYTE $0xdd // VPUNPCKHQDQ XMM11, XMM12, XMM13 /* m[1], m[9] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d1141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM13, XMM15 /* m[5], m[15] */
LONG $0x6c0941c4; BYTE $0xcc // VPUNPCKLQDQ XMM9, XMM14, XMM12 /* m[8], m[2] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c1941c4; BYTE $0xd5 // VPUNPCKLQDQ XMM10, XMM12, XMM13 /* m[0], m[4] */
LONG $0x6c0941c4; BYTE $0xdf // VPUNPCKLQDQ XMM11, XMM14, XMM15 /* m[6], m[10] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 9
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c1141c4; BYTE $0xc7 // VPUNPCKLQDQ XMM8, XMM13, XMM15 /* m[6], m[14] */
LONG $0x0f1943c4; WORD $0x08ce // VPALIGNR XMM9, XMM12, XMM14, 0x8 /* m[11], m[0] */
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
LONG $0x6d0141c4; BYTE $0xd6 // VPUNPCKHQDQ XMM10, XMM15, XMM14 /* m[15], m[9] */
LONG $0x0f0943c4; WORD $0x08dd // VPALIGNR XMM11, XMM14, XMM13, 0x8 /* m[3], m[8] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6d0141c4; BYTE $0xc7 // VPUNPCKHQDQ XMM8, XMM15, XMM15 /* ___, m[13] */
LONG $0x6c0141c4; BYTE $0xc0 // VPUNPCKLQDQ XMM8, XMM15, XMM8 /* m[12], ____ */
LONG $0x0f0943c4; WORD $0x08cc // VPALIGNR XMM9, XMM14, XMM12, 0x8 /* m[1], m[10] */
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
LONG $0x6d0141c4; BYTE $0xd7 // VPUNPCKHQDQ XMM10, XMM15, XMM15 /* ___, m[7] */
LONG $0x6c1141c4; BYTE $0xd2 // VPUNPCKLQDQ XMM10, XMM13, XMM10 /* m[2], ____ */
LONG $0x6d1941c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM12, XMM12 /* ___, m[5] */
LONG $0x6c1941c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM12, XMM11 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 0
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c0141c4; BYTE $0xc6 // VPUNPCKLQDQ XMM8, XMM15, XMM14 /* m[10], m[8] */
LONG $0x6d1141c4; BYTE $0xcc // VPUNPCKHQDQ XMM9, XMM13, XMM12 /* m[7], m[1] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X14 // X14 = m[4]+ m[5]
LONG $0x6c1941c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM12, XMM14 /* m[2], m[4] */
LONG $0x6d0941c4; BYTE $0xde // VPUNPCKHQDQ XMM11, XMM14, XMM14 /* ___, m[5] */
LONG $0x6c1141c4; BYTE $0xdb // VPUNPCKLQDQ XMM11, XMM13, XMM11 /* m[6], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0141c4; BYTE $0xc5 // VPUNPCKHQDQ XMM8, XMM15, XMM13 /* m[15], m[9] */
LONG $0x6d1941c4; BYTE $0xce // VPUNPCKHQDQ XMM9, XMM12, XMM14 /* m[3], m[13] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
LONG $0x0f0143c4; WORD $0x08d5 // VPALIGNR XMM10, XMM15, XMM13, 0x8 /* m[11], m[14] */
LONG $0x6c0941c4; BYTE $0xdc // VPUNPCKLQDQ XMM11, XMM14, XMM12 /* m[12], m[0] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[0], m[2] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[4], m[6] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[1], m[3] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[8],m[10] */
LONG $0x6c0941c4; BYTE $0xcf // VPUNPCKLQDQ XMM9, XMM14, XMM15 /* m[12],m[14] */
LONG $0x6d1941c4; BYTE $0xd5 // VPUNPCKHQDQ XMM10, XMM12, XMM13 /* m[9],m[11] */
LONG $0x6d0941c4; BYTE $0xdf // VPUNPCKHQDQ XMM11, XMM14, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
LONG $0x6c1941c4; BYTE $0xc5 // VPUNPCKLQDQ XMM8, XMM12, XMM13 /* m[14], m[4] */
LONG $0x6d0941c4; BYTE $0xcf // VPUNPCKHQDQ XMM9, XMM14, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
LONG $0x6c1141c4; BYTE $0xd6 // VPUNPCKLQDQ XMM10, XMM13, XMM14 /* m[10], m[8] */
LONG $0x0f0143c4; WORD $0x08dc // VPALIGNR XMM11, XMM15, XMM12, 0x8 /* m[15], m[6] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
LONG $0x0f1943c4; WORD $0x08c4 // VPALIGNR XMM8, XMM12, XMM12, 0x8 /* m[1], m[0] */
LONG $0x6d0941c4; BYTE $0xcd // VPUNPCKHQDQ XMM9, XMM14, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
LONG $0x6c0941c4; BYTE $0xd4 // VPUNPCKLQDQ XMM10, XMM14, XMM12 /* m[12], m[2] */
LONG $0x6d1141c4; BYTE $0xdc // VPUNPCKHQDQ XMM11, XMM13, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
// Reload digest (most current value store in &out)
MOVQ out+144(FP), SI // SI: &in
MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Final computations and prepare for storing
PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */
PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */
PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */
PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */
PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */
PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */
PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */
PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */
// Store digest into &out
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Increment message pointer and check if there's more to do
ADDQ $128, DX // message += 128
SUBQ $1, R8
JNZ loop
complete:
RET

View file

@ -1,41 +0,0 @@
//+build !noasm
//+build !appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
//go:noescape
func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64)
func compressSSE(d *digest, p []uint8) {
var (
in [8]uint64
out [8]uint64
shffle [2]uint64
)
// vector for PSHUFB instruction
shffle[0] = 0x0201000706050403
shffle[1] = 0x0a09080f0e0d0c0b
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
blockSSELoop(p, in[:], iv[:], d.t[:], d.f[:], shffle[:], out[:])
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
}

View file

@ -1,770 +0,0 @@
//+build !noasm !appengine
//
// Minio Cloud Storage, (C) 2016 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
// Based on SSE implementation from https://github.com/BLAKE2/BLAKE2/blob/master/sse/blake2b.c
//
// Use github.com/fwessels/asm2plan9s on this file to assemble instructions to their Plan9 equivalent
//
// Assembly code below essentially follows the ROUND macro (see blake2b-round.h) which is defined as:
// #define ROUND(r) \
// LOAD_MSG_ ##r ##_1(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_2(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
// LOAD_MSG_ ##r ##_3(b0, b1); \
// G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// LOAD_MSG_ ##r ##_4(b0, b1); \
// G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
//
// as well as the go equivalent in https://github.com/dchest/blake2b/blob/master/block.go
//
// As in the macro, G1/G2 in the 1st and 2nd half are identical (so literal copy of assembly)
//
// Rounds are also the same, except for the loading of the message (and rounds 1 & 11 and
// rounds 2 & 12 are identical)
//
#define G1 \
\ // G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd40f4166; BYTE $0xc0 \ // PADDQ XMM0,XMM8 /* v0 += m[0], v1 += m[2] */
LONG $0xd40f4166; BYTE $0xc9 \ // PADDQ XMM1,XMM9 /* v2 += m[4], v3 += m[6] */
LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf6700f66; BYTE $0xb1 \ // PSHUFD XMM6,XMM6,0xb1 /* v12 = v12<<(64-32) | v12>>32, v13 = v13<<(64-32) | v13>>32 */
LONG $0xff700f66; BYTE $0xb1 \ // PSHUFD XMM7,XMM7,0xb1 /* v14 = v14<<(64-32) | v14>>32, v15 = v15<<(64-32) | v15>>32 */
LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
LONG $0x380f4166; WORD $0xd400 \ // PSHUFB XMM2,XMM12 /* v4 = v4<<(64-24) | v4>>24, v5 = v5<<(64-24) | v5>>24 */
LONG $0x380f4166; WORD $0xdc00 // PSHUFB XMM3,XMM12 /* v6 = v6<<(64-24) | v6>>24, v7 = v7<<(64-24) | v7>>24 */
#define G2 \
\ // G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);
LONG $0xd40f4166; BYTE $0xc2 \ // PADDQ XMM0,XMM10 /* v0 += m[1], v1 += m[3] */
LONG $0xd40f4166; BYTE $0xcb \ // PADDQ XMM1,XMM11 /* v2 += m[5], v3 += m[7] */
LONG $0xc2d40f66 \ // PADDQ XMM0,XMM2 /* v0 += v4, v1 += v5 */
LONG $0xcbd40f66 \ // PADDQ XMM1,XMM3 /* v2 += v6, v3 += v7 */
LONG $0xf0ef0f66 \ // PXOR XMM6,XMM0 /* v12 ^= v0, v13 ^= v1 */
LONG $0xf9ef0f66 \ // PXOR XMM7,XMM1 /* v14 ^= v2, v15 ^= v3 */
LONG $0xf6700ff2; BYTE $0x39 \ // PSHUFLW XMM6,XMM6,0x39 /* combined with next ... */
LONG $0xf6700ff3; BYTE $0x39 \ // PSHUFHW XMM6,XMM6,0x39 /* v12 = v12<<(64-16) | v12>>16, v13 = v13<<(64-16) | v13>>16 */
LONG $0xff700ff2; BYTE $0x39 \ // PSHUFLW XMM7,XMM7,0x39 /* combined with next ... */
LONG $0xff700ff3; BYTE $0x39 \ // PSHUFHW XMM7,XMM7,0x39 /* v14 = v14<<(64-16) | v14>>16, v15 = v15<<(64-16) | v15>>16 */
LONG $0xe6d40f66 \ // PADDQ XMM4,XMM6 /* v8 += v12, v9 += v13 */
LONG $0xefd40f66 \ // PADDQ XMM5,XMM7 /* v10 += v14, v11 += v15 */
LONG $0xd4ef0f66 \ // PXOR XMM2,XMM4 /* v4 ^= v8, v5 ^= v9 */
LONG $0xddef0f66 \ // PXOR XMM3,XMM5 /* v6 ^= v10, v7 ^= v11 */
MOVOU X2, X15 \
LONG $0xd40f4466; BYTE $0xfa \ // PADDQ XMM15,XMM2 /* temp reg = reg*2 */
LONG $0xd2730f66; BYTE $0x3f \ // PSRLQ XMM2,0x3f /* reg = reg>>63 */
LONG $0xef0f4166; BYTE $0xd7 \ // PXOR XMM2,XMM15 /* ORed together: v4 = v4<<(64-63) | v4>>63, v5 = v5<<(64-63) | v5>>63 */
MOVOU X3, X15 \
LONG $0xd40f4466; BYTE $0xfb \ // PADDQ XMM15,XMM3 /* temp reg = reg*2 */
LONG $0xd3730f66; BYTE $0x3f \ // PSRLQ XMM3,0x3f /* reg = reg>>63 */
LONG $0xef0f4166; BYTE $0xdf // PXOR XMM3,XMM15 /* ORed together: v6 = v6<<(64-63) | v6>>63, v7 = v7<<(64-63) | v7>>63 */
#define DIAGONALIZE \
\ // DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X6, X13 \ /* t0 = row4l;\ */
MOVOU X2, X14 \ /* t1 = row2l;\ */
MOVOU X4, X6 \ /* row4l = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X6, X5 \ /* row3h = row4l;\ */
LONG $0x6c0f4566; BYTE $0xfd \ // PUNPCKLQDQ XMM15, XMM13 /* _mm_unpacklo_epi64(t0, t0) */
MOVOU X7, X6 \
LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4h, ); \ */
LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
MOVOU X13, X7 \
LONG $0x6d0f4166; BYTE $0xff \ // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2l, ); \ */
LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d0f4166; BYTE $0xdf // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */
#define UNDIAGONALIZE \
\ // UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X4, X13 \ /* t0 = row3l;\ */
MOVOU X5, X4 \ /* row3l = row3h;\ */
MOVOU X13, X5 \ /* row3h = t0;\ */
MOVOU X2, X13 \ /* t0 = row2l;\ */
MOVOU X6, X14 \ /* t1 = row4l;\ */
LONG $0x6c0f4466; BYTE $0xfa \ // PUNPCKLQDQ XMM15, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */
MOVOU X3, X2 \
LONG $0x6d0f4166; BYTE $0xd7 \ // PUNPCKHQDQ XMM2, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */
LONG $0x6c0f4466; BYTE $0xfb \ // PUNPCKLQDQ XMM15, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
MOVOU X13, X3 \
LONG $0x6d0f4166; BYTE $0xdf \ // PUNPCKHQDQ XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */
LONG $0x6c0f4466; BYTE $0xff \ // PUNPCKLQDQ XMM15, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
LONG $0x6d0f4166; BYTE $0xf7 \ // PUNPCKHQDQ XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */
LONG $0x6c0f4566; BYTE $0xfe \ // PUNPCKLQDQ XMM15, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
LONG $0x6d0f4166; BYTE $0xff // PUNPCKHQDQ XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */
#define LOAD_SHUFFLE \
\ // Load shuffle value
MOVQ shffle+120(FP), SI \ // SI: &shuffle
MOVOU 0(SI), X12 // X12 = 03040506 07000102 0b0c0d0e 0f08090a
// func blockSSELoop(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·blockSSELoop(SB), 7, $0
// REGISTER USE
// R8: loop counter
// DX: message pointer
// SI: temp pointer for loading
// X0 - X7: v0 - v15
// X8 - X11: m[0] - m[7]
// X12: shuffle value
// X13 - X15: temp registers
// Load digest
MOVQ in+24(FP), SI // SI: &in
MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Already store digest into &out (so we can reload it later generically)
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Initialize message pointer and loop counter
MOVQ message+0(FP), DX // DX: &p (message)
MOVQ message_len+8(FP), R8 // R8: len(message)
SHRQ $7, R8 // len(message) / 128
CMPQ R8, $0
JEQ complete
loop:
// Increment counter
MOVQ t+72(FP), SI // SI: &t
MOVQ 0(SI), R9
ADDQ $128, R9 // /* d.t[0] += BlockSize */
MOVQ R9, 0(SI)
CMPQ R9, $128 // /* if d.t[0] < BlockSize { */
JGE noincr
MOVQ 8(SI), R9
ADDQ $1, R9 // /* d.t[1]++ */
MOVQ R9, 8(SI)
noincr: // /* } */
// Load initialization vector
MOVQ iv+48(FP), SI // SI: &iv
MOVOU 0(SI), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */
MOVOU 16(SI), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */
MOVOU 32(SI), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */
MOVOU 48(SI), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */
MOVQ t+72(FP), SI // SI: &t
MOVOU 0(SI), X8 // X8 = t[0]+t[1] /* LOAD( &S->t[0] ) */
PXOR X8, X6 // X6 = X6 ^ X8 /* row4l = _mm_xor_si128( , ); */
MOVQ t+96(FP), SI // SI: &f
MOVOU 0(SI), X8 // X8 = f[0]+f[1] /* LOAD( &S->f[0] ) */
PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */
///////////////////////////////////////////////////////////////////////////
// R O U N D 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X10 // X10 = m[10]+m[11]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */
LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ;
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU X12, X8
LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */
LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 3
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X14, X8
LONG $0x3a0f4566; WORD $0xc50f; BYTE $0x08 // PALIGNR XMM8, XMM13, 0x8 /* m[11], m[12] */
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[5], m[15] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X10 // X10 = m[8]+ m[9]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[8], m[0] */
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */
MOVOU X13, X11
LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[2], ___ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[3] */
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[10], ___ */
MOVOU X13, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[7], m[9] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X11 // X11 = m[4]+ m[5]
MOVOU 112(DX), X10 // X10 = m[14]+m[15]
LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[14], m[6] */
LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[1], m[4] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 4
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X13, X8
LONG $0x6d0f4566; BYTE $0xc4 // PUNPCKHQDQ XMM8, XMM12 /* m[7], m[3] */
MOVOU X15, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[13], m[11] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X10 // X10 = m[8]+ m[9]
MOVOU 112(DX), X14 // X14 = m[14]+m[15]
LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* m[9], m[1] */
MOVOU X15, X11
LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[12], m[14] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X13, X9
LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* ___, m[5] */
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[2], ____ */
MOVOU X15, X10
LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* ___, m[15] */
MOVOU X13, X9
LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[4], ____ */
MOVOU 0(DX), X11 // X11 = m[0]+ m[1]
MOVOU 48(DX), X10 // X10 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[6], m[10] */
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[0], m[8] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 5
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X14, X8
LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[9], m[5] */
MOVOU X12, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[2], m[10] */
MOVOU 0(DX), X10 // X10 = m[0]+ m[1]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[7] */
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[0], ____ */
LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[15] */
MOVOU X13, X11
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[11] */
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[14], ____ */
LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[3] */
MOVOU X13, X9
LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[6], ____ */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 64(DX), X11 // X11 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU X14, X10
LONG $0x3a0f4566; WORD $0xd40f; BYTE $0x08 // PALIGNR XMM10, XMM12, 0x8 /* m[1], m[12] */
LONG $0x6d0f4566; BYTE $0xf6 // PUNPCKHQDQ XMM14, XMM14 /* ___, m[13] */
LONG $0x6c0f4566; BYTE $0xde // PUNPCKLQDQ XMM11, XMM14 /* m[8], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 6
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 64(DX), X15 // X15 = m[8]+ m[9]
MOVOU X13, X8
LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[2], m[6] */
MOVOU X12, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[0], m[8] */
MOVOU 80(DX), X12 // X12 = m[10]+m[11]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[10] */
MOVOU X12, X11
LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[11], m[3] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 48(DX), X14 // X14 = m[6]+ m[7]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* ___, m[7] */
MOVOU X13, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[4], ____ */
MOVOU X15, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[15], m[1] */
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[13], m[5] */
LONG $0x6d0f4566; BYTE $0xe4 // PUNPCKHQDQ XMM12, XMM12 /* ___, m[9] */
MOVOU X15, X11
LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[14], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 7
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* ___, m[1] */
MOVOU X14, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */
MOVOU X15, X9
LONG $0x6c0f4566; BYTE $0xcd // PUNPCKLQDQ XMM9, XMM13 /* m[14], m[4] */
MOVOU 80(DX), X11 // X11 = m[10]+m[11]
MOVOU X13, X10
LONG $0x6d0f4566; BYTE $0xd7 // PUNPCKHQDQ XMM10, XMM15 /* m[5], m[15] */
LONG $0x3a0f4566; WORD $0xde0f; BYTE $0x08 // PALIGNR XMM11, XMM14, 0x8 /* m[13], m[10] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[6] */
MOVOU X14, X9
LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[9], m[8] */
MOVOU 16(DX), X11 // X14 = m[2]+ m[3]
MOVOU X13, X10
LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[7], m[3] */
LONG $0x6d0f4566; BYTE $0xff // PUNPCKHQDQ XMM15, XMM15 /* ___, m[11] */
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[2], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 8
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X14, X8
LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[13], m[7] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd4 // PUNPCKHQDQ XMM10, XMM12 /* ___, m[3] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xca // PUNPCKLQDQ XMM9, XMM10 /* m[12], ____ */
MOVOU 0(DX), X11 // X11 = m[0]+ m[1]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU X15, X10
LONG $0x3a0f4566; WORD $0xd60f; BYTE $0x08 // PALIGNR XMM10, XMM14, 0x8 /* m[11], m[14] */
LONG $0x6d0f4566; BYTE $0xdd // PUNPCKHQDQ XMM11, XMM13 /* m[1], m[9] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X13, X8
LONG $0x6d0f4566; BYTE $0xc7 // PUNPCKHQDQ XMM8, XMM15 /* m[5], m[15] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcc // PUNPCKLQDQ XMM9, XMM12 /* m[8], m[2] */
MOVOU 0(DX), X10 // X10 = m[0]+ m[1]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
LONG $0x6c0f4566; BYTE $0xd5 // PUNPCKLQDQ XMM10, XMM13 /* m[0], m[4] */
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], m[10] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 9
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X13, X8
LONG $0x6c0f4566; BYTE $0xc7 // PUNPCKLQDQ XMM8, XMM15 /* m[6], m[14] */
MOVOU X12, X9
LONG $0x3a0f4566; WORD $0xce0f; BYTE $0x08 // PALIGNR XMM9, XMM14, 0x8 /* m[11], m[0] */
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 64(DX), X11 // X11 = m[8]+ m[9]
MOVOU X15, X10
LONG $0x6d0f4566; BYTE $0xd3 // PUNPCKHQDQ XMM10, XMM11 /* m[15], m[9] */
LONG $0x3a0f4566; WORD $0xdd0f; BYTE $0x08 // PALIGNR XMM11, XMM13, 0x8 /* m[3], m[8] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 16(DX), X13 // X13 = m[2]+ m[3]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X15, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* ___, m[13] */
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc1 // PUNPCKLQDQ XMM8, XMM9 /* m[12], ____ */
MOVOU X14, X9
LONG $0x3a0f4566; WORD $0xcc0f; BYTE $0x08 // PALIGNR XMM9, XMM12, 0x8 /* m[1], m[10] */
MOVOU 32(DX), X12 // X12 = m[4]+ m[5]
MOVOU 48(DX), X15 // X15 = m[6]+ m[7]
MOVOU X15, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* ___, m[7] */
MOVOU X13, X10
LONG $0x6c0f4566; BYTE $0xd3 // PUNPCKLQDQ XMM10, XMM11 /* m[2], ____ */
MOVOU X12, X15
LONG $0x6d0f4566; BYTE $0xfc // PUNPCKHQDQ XMM15, XMM12 /* ___, m[5] */
MOVOU X12, X11
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[4], ____ */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 0
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 48(DX), X13 // X13 = m[6]+ m[7]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 80(DX), X15 // X15 = m[10]+m[11]
MOVOU X15, X8
LONG $0x6c0f4566; BYTE $0xc6 // PUNPCKLQDQ XMM8, XMM14 /* m[10], m[8] */
MOVOU X13, X9
LONG $0x6d0f4566; BYTE $0xcc // PUNPCKHQDQ XMM9, XMM12 /* m[7], m[1] */
MOVOU 16(DX), X10 // X10 = m[2]+ m[3]
MOVOU 32(DX), X14 // X14 = m[4]+ m[5]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[2], m[4] */
MOVOU X14, X15
LONG $0x6d0f4566; BYTE $0xfe // PUNPCKHQDQ XMM15, XMM14 /* ___, m[5] */
MOVOU X13, X11
LONG $0x6c0f4566; BYTE $0xdf // PUNPCKLQDQ XMM11, XMM15 /* m[6], ____ */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 64(DX), X13 // X13 = m[8]+ m[9]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X15, X8
LONG $0x6d0f4566; BYTE $0xc5 // PUNPCKHQDQ XMM8, XMM13 /* m[15], m[9] */
MOVOU X12, X9
LONG $0x6d0f4566; BYTE $0xce // PUNPCKHQDQ XMM9, XMM14 /* m[3], m[13] */
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU X15, X10
LONG $0x3a0f4566; WORD $0xd50f; BYTE $0x08 // PALIGNR XMM10, XMM13, 0x8 /* m[11], m[14] */
MOVOU X14, X11
LONG $0x6c0f4566; BYTE $0xdc // PUNPCKLQDQ XMM11, XMM12 /* m[12], m[0] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 1
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+m[1]
MOVOU 16(DX), X13 // X13 = m[2]+m[3]
MOVOU 32(DX), X14 // X14 = m[4]+m[5]
MOVOU 48(DX), X15 // X15 = m[6]+m[7]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[0], m[2] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[4], m[6] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[1], m[3] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[5], m[7] */
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 64(DX), X12 // X12 = m[8]+ m[9]
MOVOU 80(DX), X13 // X13 = m[10]+m[11]
MOVOU 96(DX), X14 // X14 = m[12]+m[13]
MOVOU 112(DX), X15 // X15 = m[14]+m[15]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[8],m[10] */
MOVOU X14, X9
LONG $0x6c0f4566; BYTE $0xcf // PUNPCKLQDQ XMM9, XMM15 /* m[12],m[14] */
MOVOU X12, X10
LONG $0x6d0f4566; BYTE $0xd5 // PUNPCKHQDQ XMM10, XMM13 /* m[9],m[11] */
MOVOU X14, X11
LONG $0x6d0f4566; BYTE $0xdf // PUNPCKHQDQ XMM11, XMM15 /* m[13],m[15] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
///////////////////////////////////////////////////////////////////////////
// R O U N D 1 2
///////////////////////////////////////////////////////////////////////////
// LOAD_MSG_ ##r ##_1 / ##_2(b0, b1); (X12 is temp register)
MOVOU 112(DX), X12 // X12 = m[14]+m[15]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 64(DX), X14 // X14 = m[8]+ m[9]
MOVOU 96(DX), X15 // X15 = m[12]+m[13]
MOVOU X12, X8
LONG $0x6c0f4566; BYTE $0xc5 // PUNPCKLQDQ XMM8, XMM13 /* m[14], m[4] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcf // PUNPCKHQDQ XMM9, XMM15 /* m[9], m[13] */
MOVOU 80(DX), X10 // X10 = m[10]+m[11]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
LONG $0x6c0f4566; BYTE $0xd6 // PUNPCKLQDQ XMM10, XMM14 /* m[10], m[8] */
LONG $0x3a0f4566; WORD $0xdc0f; BYTE $0x08 // PALIGNR XMM11, XMM12, 0x8 /* m[15], m[6] */; ; ; ; ;
LOAD_SHUFFLE
G1
G2
DIAGONALIZE
// LOAD_MSG_ ##r ##_3 / ##_4(b0, b1); (X12 is temp register)
MOVOU 0(DX), X12 // X12 = m[0]+ m[1]
MOVOU 32(DX), X13 // X13 = m[4]+ m[5]
MOVOU 80(DX), X14 // X14 = m[10]+m[11]
MOVOU X12, X8
LONG $0x3a0f4566; WORD $0xc40f; BYTE $0x08 // PALIGNR XMM8, XMM12, 0x8 /* m[1], m[0] */
MOVOU X14, X9
LONG $0x6d0f4566; BYTE $0xcd // PUNPCKHQDQ XMM9, XMM13 /* m[11], m[5] */
MOVOU 16(DX), X12 // X12 = m[2]+ m[3]
MOVOU 48(DX), X11 // X11 = m[6]+ m[7]
MOVOU 96(DX), X10 // X10 = m[12]+m[13]
LONG $0x6c0f4566; BYTE $0xd4 // PUNPCKLQDQ XMM10, XMM12 /* m[12], m[2] */
LONG $0x6d0f4566; BYTE $0xdc // PUNPCKHQDQ XMM11, XMM12 /* m[7], m[3] */
LOAD_SHUFFLE
G1
G2
UNDIAGONALIZE
// Reload digest (most current value store in &out)
MOVQ out+144(FP), SI // SI: &in
MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
// Final computations and prepare for storing
PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */
PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */
PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */
PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */
PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */
PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */
PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */
PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */
// Store digest into &out
MOVQ out+144(FP), SI // SI: &out
MOVOU X0, 0(SI) // out[0]+out[1] = X0
MOVOU X1, 16(SI) // out[2]+out[3] = X1
MOVOU X2, 32(SI) // out[4]+out[5] = X2
MOVOU X3, 48(SI) // out[6]+out[7] = X3
// Increment message pointer and check if there's more to do
ADDQ $128, DX // message += 128
SUBQ $1, R8
JNZ loop
complete:
RET

View file

@ -1,30 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
func compress(d *digest, p []uint8) {
// Verifies if AVX2 or AVX is available, use optimized code path.
if avx2 {
compressAVX2(d, p)
} else if avx {
compressAVX(d, p)
} else if ssse3 {
compressSSE(d, p)
} else {
compressGeneric(d, p)
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,23 +0,0 @@
//+build !amd64 noasm appengine
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package blake2b
func compress(d *digest, p []uint8) {
compressGeneric(d, p)
}

View file

@ -1,60 +0,0 @@
// +build 386,!gccgo amd64,!gccgo
// Copyright 2016 Frank Wessels <fwessels@xs4all.nl>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package blake2b
func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func xgetbv(index uint32) (eax, edx uint32)
// True when SIMD instructions are available.
var avx2 = haveAVX2()
var avx = haveAVX()
var ssse3 = haveSSSE3()
// haveAVX returns true when there is AVX support
func haveAVX() bool {
_, _, c, _ := cpuid(1)
// Check XGETBV, OXSAVE and AVX bits
if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
// Check for OS support
eax, _ := xgetbv(0)
return (eax & 0x6) == 0x6
}
return false
}
// haveAVX2 returns true when there is AVX2 support
func haveAVX2() bool {
mfi, _, _, _ := cpuid(0)
// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
if mfi >= 7 && haveAVX() {
_, ebx, _, _ := cpuidex(7, 0)
return (ebx & 0x00000020) != 0
}
return false
}
// haveSSSE3 returns true when there is SSSE3 support
func haveSSSE3() bool {
_, _, c, _ := cpuid(1)
return (c & 0x00000200) != 0
}

View file

@ -1,33 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·xgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET

View file

@ -1,34 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build amd64,!gccgo
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·xgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET

188
vendor/golang.org/x/crypto/blake2b/blake2b.go generated vendored Normal file
View file

@ -0,0 +1,188 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blake2b implements the BLAKE2b hash algorithm as
// defined in RFC 7693.
package blake2b
import (
"encoding/binary"
"errors"
"hash"
)
const (
// The blocksize of BLAKE2b in bytes.
BlockSize = 128
// The hash size of BLAKE2b-512 in bytes.
Size = 64
// The hash size of BLAKE2b-384 in bytes.
Size384 = 48
// The hash size of BLAKE2b-256 in bytes.
Size256 = 32
)
var errKeySize = errors.New("blake2b: invalid key size")
var iv = [8]uint64{
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
}
// Sum512 returns the BLAKE2b-512 checksum of the data.
func Sum512(data []byte) [Size]byte {
var sum [Size]byte
checkSum(&sum, Size, data)
return sum
}
// Sum384 returns the BLAKE2b-384 checksum of the data.
func Sum384(data []byte) [Size384]byte {
var sum [Size]byte
var sum384 [Size384]byte
checkSum(&sum, Size384, data)
copy(sum384[:], sum[:Size384])
return sum384
}
// Sum256 returns the BLAKE2b-256 checksum of the data.
func Sum256(data []byte) [Size256]byte {
var sum [Size]byte
var sum256 [Size256]byte
checkSum(&sum, Size256, data)
copy(sum256[:], sum[:Size256])
return sum256
}
// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) }
// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) }
// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
// key turns the hash into a MAC. The key must between zero and 64 bytes long.
func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) }
func newDigest(hashSize int, key []byte) (*digest, error) {
if len(key) > Size {
return nil, errKeySize
}
d := &digest{
size: hashSize,
keyLen: len(key),
}
copy(d.key[:], key)
d.Reset()
return d, nil
}
func checkSum(sum *[Size]byte, hashSize int, data []byte) {
h := iv
h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24)
var c [2]uint64
if length := len(data); length > BlockSize {
n := length &^ (BlockSize - 1)
if length == n {
n -= BlockSize
}
hashBlocks(&h, &c, 0, data[:n])
data = data[n:]
}
var block [BlockSize]byte
offset := copy(block[:], data)
remaining := uint64(BlockSize - offset)
if c[0] < remaining {
c[1]--
}
c[0] -= remaining
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
for i, v := range h[:(hashSize+7)/8] {
binary.LittleEndian.PutUint64(sum[8*i:], v)
}
}
type digest struct {
h [8]uint64
c [2]uint64
size int
block [BlockSize]byte
offset int
key [BlockSize]byte
keyLen int
}
func (d *digest) BlockSize() int { return BlockSize }
func (d *digest) Size() int { return d.size }
func (d *digest) Reset() {
d.h = iv
d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24)
d.offset, d.c[0], d.c[1] = 0, 0, 0
if d.keyLen > 0 {
d.block = d.key
d.offset = BlockSize
}
}
func (d *digest) Write(p []byte) (n int, err error) {
n = len(p)
if d.offset > 0 {
remaining := BlockSize - d.offset
if n <= remaining {
d.offset += copy(d.block[d.offset:], p)
return
}
copy(d.block[d.offset:], p[:remaining])
hashBlocks(&d.h, &d.c, 0, d.block[:])
d.offset = 0
p = p[remaining:]
}
if length := len(p); length > BlockSize {
nn := length &^ (BlockSize - 1)
if length == nn {
nn -= BlockSize
}
hashBlocks(&d.h, &d.c, 0, p[:nn])
p = p[nn:]
}
if len(p) > 0 {
d.offset += copy(d.block[:], p)
}
return
}
func (d *digest) Sum(b []byte) []byte {
var block [BlockSize]byte
copy(block[:], d.block[:d.offset])
remaining := uint64(BlockSize - d.offset)
c := d.c
if c[0] < remaining {
c[1]--
}
c[0] -= remaining
h := d.h
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
var sum [Size]byte
for i, v := range h[:(d.size+7)/8] {
binary.LittleEndian.PutUint64(sum[8*i:], v)
}
return append(b, sum[:d.size]...)
}

View file

@ -0,0 +1,41 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine
package blake2b
var useAVX2 = supportAVX2()
var useAVX = supportAVX()
var useSSE4 = supportSSE4()
//go:noescape
func supportSSE4() bool
//go:noescape
func supportAVX() bool
//go:noescape
func supportAVX2() bool
//go:noescape
func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
//go:noescape
func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
//go:noescape
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
if useAVX2 {
hashBlocksAVX2(h, c, flag, blocks)
} else if useAVX {
hashBlocksAVX(h, c, flag, blocks)
} else if useSSE4 {
hashBlocksSSE4(h, c, flag, blocks)
} else {
hashBlocksGeneric(h, c, flag, blocks)
}
}

502
vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s generated vendored Normal file
View file

@ -0,0 +1,502 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine
#include "textflag.h"
DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
// unfortunately the BYTE representation of VPERMQ must be used
#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
VPADDQ m0, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFD $-79, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPSHUFB c40, Y1, Y1; \
VPADDQ m1, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFB c48, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPADDQ Y1, Y1, t; \
VPSRLQ $63, Y1, Y1; \
VPXOR t, Y1, Y1; \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 \ // VPERMQ 0x39, Y1, Y1
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e \ // VPERMQ 0x4e, Y2, Y2
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 \ // VPERMQ 0x93, Y3, Y3
VPADDQ m2, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFD $-79, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPSHUFB c40, Y1, Y1; \
VPADDQ m3, Y0, Y0; \
VPADDQ Y1, Y0, Y0; \
VPXOR Y0, Y3, Y3; \
VPSHUFB c48, Y3, Y3; \
VPADDQ Y3, Y2, Y2; \
VPXOR Y2, Y1, Y1; \
VPADDQ Y1, Y1, t; \
VPSRLQ $63, Y1, Y1; \
VPXOR t, Y1, Y1; \
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 \ // VPERMQ 0x39, Y3, Y3
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e \ // VPERMQ 0x4e, Y2, Y2
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 \ // VPERMQ 0x93, Y1, Y1
// load msg into Y12, Y13, Y14, Y15
#define LOAD_MSG_AVX2(src, i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15) \
MOVQ i0*8(src), X12; \
PINSRQ $1, i1*8(src), X12; \
MOVQ i2*8(src), X11; \
PINSRQ $1, i3*8(src), X11; \
VINSERTI128 $1, X11, Y12, Y12; \
MOVQ i4*8(src), X13; \
PINSRQ $1, i5*8(src), X13; \
MOVQ i6*8(src), X11; \
PINSRQ $1, i7*8(src), X11; \
VINSERTI128 $1, X11, Y13, Y13; \
MOVQ i8*8(src), X14; \
PINSRQ $1, i9*8(src), X14; \
MOVQ i10*8(src), X11; \
PINSRQ $1, i11*8(src), X11; \
VINSERTI128 $1, X11, Y14, Y14; \
MOVQ i12*8(src), X15; \
PINSRQ $1, i13*8(src), X15; \
MOVQ i14*8(src), X11; \
PINSRQ $1, i15*8(src), X11; \
VINSERTI128 $1, X11, Y15, Y15
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, DX
MOVQ SP, R9
ADDQ $31, R9
ANDQ $~31, R9
MOVQ R9, SP
MOVQ CX, 16(SP)
XORQ CX, CX
MOVQ CX, 24(SP)
VMOVDQU ·AVX2_c40<>(SB), Y4
VMOVDQU ·AVX2_c48<>(SB), Y5
VMOVDQU 0(AX), Y8
VMOVDQU 32(AX), Y9
VMOVDQU ·AVX2_iv0<>(SB), Y6
VMOVDQU ·AVX2_iv1<>(SB), Y7
MOVQ 0(BX), R8
MOVQ 8(BX), R9
MOVQ R9, 8(SP)
loop:
ADDQ $128, R8
MOVQ R8, 0(SP)
CMPQ R8, $128
JGE noinc
INCQ R9
MOVQ R9, 8(SP)
noinc:
VMOVDQA Y8, Y0
VMOVDQA Y9, Y1
VMOVDQA Y6, Y2
VPXOR 0(SP), Y7, Y3
LOAD_MSG_AVX2(SI, 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15)
VMOVDQA Y12, 32(SP)
VMOVDQA Y13, 64(SP)
VMOVDQA Y14, 96(SP)
VMOVDQA Y15, 128(SP)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3)
VMOVDQA Y12, 160(SP)
VMOVDQA Y13, 192(SP)
VMOVDQA Y14, 224(SP)
VMOVDQA Y15, 256(SP)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
LOAD_MSG_AVX2(SI, 10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0)
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
ROUND_AVX2(32(SP), 64(SP), 96(SP), 128(SP), Y10, Y4, Y5)
ROUND_AVX2(160(SP), 192(SP), 224(SP), 256(SP), Y10, Y4, Y5)
VPXOR Y0, Y8, Y8
VPXOR Y1, Y9, Y9
VPXOR Y2, Y8, Y8
VPXOR Y3, Y9, Y9
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
VMOVDQU Y8, 0(AX)
VMOVDQU Y9, 32(AX)
MOVQ DX, SP
RET
// unfortunately the BYTE representation of VPUNPCKLQDQ and VPUNPCKHQDQ must be used
#define VPUNPCKLQDQ_X8_X8_X10 BYTE $0xC4; BYTE $0x41; BYTE $0x39; BYTE $0x6C; BYTE $0xD0
#define VPUNPCKHQDQ_X7_X10_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF2
#define VPUNPCKLQDQ_X7_X7_X10 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xD7
#define VPUNPCKHQDQ_X8_X10_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x39; BYTE $0x6D; BYTE $0xFA
#define VPUNPCKLQDQ_X3_X3_X10 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xD3
#define VPUNPCKHQDQ_X2_X10_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD2
#define VPUNPCKLQDQ_X9_X9_X10 BYTE $0xC4; BYTE $0x41; BYTE $0x31; BYTE $0x6C; BYTE $0xD1
#define VPUNPCKHQDQ_X3_X10_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDA
#define VPUNPCKLQDQ_X2_X2_X10 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xD2
#define VPUNPCKHQDQ_X3_X10_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD2
#define VPUNPCKHQDQ_X8_X10_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x39; BYTE $0x6D; BYTE $0xDA
#define VPUNPCKHQDQ_X6_X10_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF2
#define VPUNPCKHQDQ_X7_X10_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFA
// shuffle X2 and X6 using the temp registers X8, X9, X10
#define SHUFFLE_AVX() \
VMOVDQA X4, X9; \
VMOVDQA X5, X4; \
VMOVDQA X9, X5; \
VMOVDQA X6, X8; \
VPUNPCKLQDQ_X8_X8_X10; \
VPUNPCKHQDQ_X7_X10_X6; \
VPUNPCKLQDQ_X7_X7_X10; \
VPUNPCKHQDQ_X8_X10_X7; \
VPUNPCKLQDQ_X3_X3_X10; \
VMOVDQA X2, X9; \
VPUNPCKHQDQ_X2_X10_X2; \
VPUNPCKLQDQ_X9_X9_X10; \
VPUNPCKHQDQ_X3_X10_X3; \
// inverse shuffle X2 and X6 using the temp registers X8, X9, X10
#define SHUFFLE_AVX_INV() \
VMOVDQA X4, X9; \
VMOVDQA X5, X4; \
VMOVDQA X9, X5; \
VMOVDQA X2, X8; \
VPUNPCKLQDQ_X2_X2_X10; \
VPUNPCKHQDQ_X3_X10_X2; \
VPUNPCKLQDQ_X3_X3_X10; \
VPUNPCKHQDQ_X8_X10_X3; \
VPUNPCKLQDQ_X7_X7_X10; \
VMOVDQA X6, X9; \
VPUNPCKHQDQ_X6_X10_X6; \
VPUNPCKLQDQ_X9_X9_X10; \
VPUNPCKHQDQ_X7_X10_X7; \
#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
VPADDQ m0, v0, v0; \
VPADDQ v2, v0, v0; \
VPADDQ m1, v1, v1; \
VPADDQ v3, v1, v1; \
VPXOR v0, v6, v6; \
VPXOR v1, v7, v7; \
VPSHUFD $-79, v6, v6; \
VPSHUFD $-79, v7, v7; \
VPADDQ v6, v4, v4; \
VPADDQ v7, v5, v5; \
VPXOR v4, v2, v2; \
VPXOR v5, v3, v3; \
VPSHUFB c40, v2, v2; \
VPSHUFB c40, v3, v3; \
VPADDQ m2, v0, v0; \
VPADDQ v2, v0, v0; \
VPADDQ m3, v1, v1; \
VPADDQ v3, v1, v1; \
VPXOR v0, v6, v6; \
VPXOR v1, v7, v7; \
VPSHUFB c48, v6, v6; \
VPSHUFB c48, v7, v7; \
VPADDQ v6, v4, v4; \
VPADDQ v7, v5, v5; \
VPXOR v4, v2, v2; \
VPXOR v5, v3, v3; \
VPADDQ v2, v2, t0; \
VPSRLQ $63, v2, v2; \
VPXOR t0, v2, v2; \
VPADDQ v3, v3, t0; \
VPSRLQ $63, v3, v3; \
VPXOR t0, v3, v3
// unfortunately the BYTE representation of VPINSRQ must be used
#define VPINSRQ_1_R10_X8_X8 BYTE $0xC4; BYTE $0x43; BYTE $0xB9; BYTE $0x22; BYTE $0xC2; BYTE $0x01
#define VPINSRQ_1_R11_X9_X9 BYTE $0xC4; BYTE $0x43; BYTE $0xB1; BYTE $0x22; BYTE $0xCB; BYTE $0x01
#define VPINSRQ_1_R12_X10_X10 BYTE $0xC4; BYTE $0x43; BYTE $0xA9; BYTE $0x22; BYTE $0xD4; BYTE $0x01
#define VPINSRQ_1_R13_X11_X11 BYTE $0xC4; BYTE $0x43; BYTE $0xA1; BYTE $0x22; BYTE $0xDD; BYTE $0x01
#define VPINSRQ_1_R9_X8_X8 BYTE $0xC4; BYTE $0x43; BYTE $0xB9; BYTE $0x22; BYTE $0xC1; BYTE $0x01
// load src into X8, X9, X10 and X11 using R10, R11, R12 and R13 for temp registers
#define LOAD_MSG_AVX(src, i0, i1, i2, i3, i4, i5, i6, i7) \
MOVQ i0*8(src), X8; \
MOVQ i1*8(src), R10; \
MOVQ i2*8(src), X9; \
MOVQ i3*8(src), R11; \
MOVQ i4*8(src), X10; \
MOVQ i5*8(src), R12; \
MOVQ i6*8(src), X11; \
MOVQ i7*8(src), R13; \
VPINSRQ_1_R10_X8_X8; \
VPINSRQ_1_R11_X9_X9; \
VPINSRQ_1_R12_X10_X10; \
VPINSRQ_1_R13_X11_X11
// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, BP
MOVQ SP, R9
ADDQ $15, R9
ANDQ $~15, R9
MOVQ R9, SP
MOVOU ·AVX_c40<>(SB), X13
MOVOU ·AVX_c48<>(SB), X14
VMOVDQU ·AVX_iv3<>(SB), X0
VMOVDQA X0, 0(SP)
XORQ CX, 0(SP) // 0(SP) = ·AVX_iv3 ^ (CX || 0)
VMOVDQU 0(AX), X12
VMOVDQU 16(AX), X15
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
MOVQ 0(BX), R8
MOVQ 8(BX), R9
loop:
ADDQ $128, R8
CMPQ R8, $128
JGE noinc
INCQ R9
noinc:
MOVQ R8, X8
VPINSRQ_1_R9_X8_X8
VMOVDQA X12, X0
VMOVDQA X15, X1
VMOVDQU ·AVX_iv0<>(SB), X4
VMOVDQU ·AVX_iv1<>(SB), X5
VMOVDQU ·AVX_iv2<>(SB), X6
VPXOR X8, X6, X6
VMOVDQA 0(SP), X7
LOAD_MSG_AVX(SI, 0, 2, 4, 6, 1, 3, 5, 7)
VMOVDQA X8, 16(SP)
VMOVDQA X9, 32(SP)
VMOVDQA X10, 48(SP)
VMOVDQA X11, 64(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 8, 10, 12, 14, 9, 11, 13, 15)
VMOVDQA X8, 80(SP)
VMOVDQA X9, 96(SP)
VMOVDQA X10, 112(SP)
VMOVDQA X11, 128(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 14, 4, 9, 13, 10, 8, 15, 6)
VMOVDQA X8, 144(SP)
VMOVDQA X9, 160(SP)
VMOVDQA X10, 176(SP)
VMOVDQA X11, 192(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 1, 0, 11, 5, 12, 2, 7, 3)
VMOVDQA X8, 208(SP)
VMOVDQA X9, 224(SP)
VMOVDQA X10, 240(SP)
VMOVDQA X11, 256(SP)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 11, 12, 5, 15, 8, 0, 2, 13)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 10, 3, 7, 9, 14, 6, 1, 4)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 7, 3, 13, 11, 9, 1, 12, 14)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 2, 5, 4, 15, 6, 10, 0, 8)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 9, 5, 2, 10, 0, 7, 4, 15)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 14, 11, 6, 3, 1, 12, 8, 13)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 2, 6, 0, 8, 12, 10, 11, 3)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 4, 7, 15, 1, 13, 5, 14, 9)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 12, 1, 14, 4, 5, 15, 13, 10)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 0, 6, 9, 8, 7, 3, 2, 11)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 13, 7, 12, 3, 11, 14, 1, 9)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 5, 15, 8, 2, 0, 4, 6, 10)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 6, 14, 11, 0, 15, 9, 3, 8)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 12, 13, 1, 10, 2, 7, 4, 5)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
LOAD_MSG_AVX(SI, 10, 8, 7, 1, 2, 4, 6, 5)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX()
LOAD_MSG_AVX(SI, 15, 9, 3, 13, 11, 14, 12, 0)
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_AVX_INV()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14)
SHUFFLE_AVX()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14)
SHUFFLE_AVX_INV()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14)
SHUFFLE_AVX()
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14)
SHUFFLE_AVX_INV()
VMOVDQU 32(AX), X10
VMOVDQU 48(AX), X11
VPXOR X0, X12, X12
VPXOR X1, X15, X15
VPXOR X2, X10, X10
VPXOR X3, X11, X11
VPXOR X4, X12, X12
VPXOR X5, X15, X15
VPXOR X6, X10, X2
VPXOR X7, X11, X3
VMOVDQU X2, 32(AX)
VMOVDQU X3, 48(AX)
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
VMOVDQU X12, 0(AX)
VMOVDQU X15, 16(AX)
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
VZEROUPPER
MOVQ BP, SP
RET
// func supportAVX2() bool
TEXT ·supportAVX2(SB), 4, $0-1
MOVQ runtime·support_avx2(SB), AX
MOVB AX, ret+0(FP)
RET
// func supportAVX() bool
TEXT ·supportAVX(SB), 4, $0-1
MOVQ runtime·support_avx(SB), AX
MOVB AX, ret+0(FP)
RET

25
vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go generated vendored Normal file
View file

@ -0,0 +1,25 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.7,amd64,!gccgo,!appengine
package blake2b
var useAVX2 = false
var useAVX = false
var useSSE4 = supportSSE4()
//go:noescape
func supportSSE4() bool
//go:noescape
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
if useSSE4 {
hashBlocksSSE4(h, c, flag, blocks)
} else {
hashBlocksGeneric(h, c, flag, blocks)
}
}

290
vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s generated vendored Normal file
View file

@ -0,0 +1,290 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
#include "textflag.h"
DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
MOVO v4, t1; \
MOVO v5, v4; \
MOVO t1, v5; \
MOVO v6, t1; \
PUNPCKLQDQ v6, t2; \
PUNPCKHQDQ v7, v6; \
PUNPCKHQDQ t2, v6; \
PUNPCKLQDQ v7, t2; \
MOVO t1, v7; \
MOVO v2, t1; \
PUNPCKHQDQ t2, v7; \
PUNPCKLQDQ v3, t2; \
PUNPCKHQDQ t2, v2; \
PUNPCKLQDQ t1, t2; \
PUNPCKHQDQ t2, v3
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
MOVO v4, t1; \
MOVO v5, v4; \
MOVO t1, v5; \
MOVO v2, t1; \
PUNPCKLQDQ v2, t2; \
PUNPCKHQDQ v3, v2; \
PUNPCKHQDQ t2, v2; \
PUNPCKLQDQ v3, t2; \
MOVO t1, v3; \
MOVO v6, t1; \
PUNPCKHQDQ t2, v3; \
PUNPCKLQDQ v7, t2; \
PUNPCKHQDQ t2, v6; \
PUNPCKLQDQ t1, t2; \
PUNPCKHQDQ t2, v7
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
PADDQ m0, v0; \
PADDQ m1, v1; \
PADDQ v2, v0; \
PADDQ v3, v1; \
PXOR v0, v6; \
PXOR v1, v7; \
PSHUFD $0xB1, v6, v6; \
PSHUFD $0xB1, v7, v7; \
PADDQ v6, v4; \
PADDQ v7, v5; \
PXOR v4, v2; \
PXOR v5, v3; \
PSHUFB c40, v2; \
PSHUFB c40, v3; \
PADDQ m2, v0; \
PADDQ m3, v1; \
PADDQ v2, v0; \
PADDQ v3, v1; \
PXOR v0, v6; \
PXOR v1, v7; \
PSHUFB c48, v6; \
PSHUFB c48, v7; \
PADDQ v6, v4; \
PADDQ v7, v5; \
PXOR v4, v2; \
PXOR v5, v3; \
MOVOU v2, t0; \
PADDQ v2, t0; \
PSRLQ $63, v2; \
PXOR t0, v2; \
MOVOU v3, t0; \
PADDQ v3, t0; \
PSRLQ $63, v3; \
PXOR t0, v3
#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
MOVQ i0*8(src), m0; \
PINSRQ $1, i1*8(src), m0; \
MOVQ i2*8(src), m1; \
PINSRQ $1, i3*8(src), m1; \
MOVQ i4*8(src), m2; \
PINSRQ $1, i5*8(src), m2; \
MOVQ i6*8(src), m3; \
PINSRQ $1, i7*8(src), m3
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DI
MOVQ SP, BP
MOVQ SP, R9
ADDQ $15, R9
ANDQ $~15, R9
MOVQ R9, SP
MOVOU ·iv3<>(SB), X0
MOVO X0, 0(SP)
XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0)
MOVOU ·c40<>(SB), X13
MOVOU ·c48<>(SB), X14
MOVOU 0(AX), X12
MOVOU 16(AX), X15
MOVQ 0(BX), R8
MOVQ 8(BX), R9
loop:
ADDQ $128, R8
CMPQ R8, $128
JGE noinc
INCQ R9
noinc:
MOVQ R8, X8
PINSRQ $1, R9, X8
MOVO X12, X0
MOVO X15, X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU ·iv0<>(SB), X4
MOVOU ·iv1<>(SB), X5
MOVOU ·iv2<>(SB), X6
PXOR X8, X6
MOVO 0(SP), X7
LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
MOVO X8, 16(SP)
MOVO X9, 32(SP)
MOVO X10, 48(SP)
MOVO X11, 64(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
MOVO X8, 80(SP)
MOVO X9, 96(SP)
MOVO X10, 112(SP)
MOVO X11, 128(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
MOVO X8, 144(SP)
MOVO X9, 160(SP)
MOVO X10, 176(SP)
MOVO X11, 192(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
MOVO X8, 208(SP)
MOVO X9, 224(SP)
MOVO X10, 240(SP)
MOVO X11, 256(SP)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14)
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14)
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
MOVOU 32(AX), X10
MOVOU 48(AX), X11
PXOR X0, X12
PXOR X1, X15
PXOR X2, X10
PXOR X3, X11
PXOR X4, X12
PXOR X5, X15
PXOR X6, X10
PXOR X7, X11
MOVOU X10, 32(AX)
MOVOU X11, 48(AX)
LEAQ 128(SI), SI
SUBQ $128, DI
JNE loop
MOVOU X12, 0(AX)
MOVOU X15, 16(AX)
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
MOVQ BP, SP
RET
// func supportSSE4() bool
TEXT ·supportSSE4(SB), 4, $0-1
MOVL $1, AX
CPUID
SHRL $19, CX // Bit 19 indicates SSE4 support
ANDL $1, CX // CX != 0 if support SSE4
MOVB CX, ret+0(FP)
RET

179
vendor/golang.org/x/crypto/blake2b/blake2b_generic.go generated vendored Normal file
View file

@ -0,0 +1,179 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blake2b
import "encoding/binary"
// the precomputed values for BLAKE2b
// there are 12 16-byte arrays - one for each round
// the entries are calculated from the sigma constants.
var precomputed = [12][16]byte{
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
}
func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
var m [16]uint64
c0, c1 := c[0], c[1]
for i := 0; i < len(blocks); {
c0 += BlockSize
if c0 < BlockSize {
c1++
}
v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]
v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]
v12 ^= c0
v13 ^= c1
v14 ^= flag
for j := range m {
m[j] = binary.LittleEndian.Uint64(blocks[i:])
i += 8
}
for j := range precomputed {
s := &(precomputed[j])
v0 += m[s[0]]
v0 += v4
v12 ^= v0
v12 = v12<<(64-32) | v12>>32
v8 += v12
v4 ^= v8
v4 = v4<<(64-24) | v4>>24
v1 += m[s[1]]
v1 += v5
v13 ^= v1
v13 = v13<<(64-32) | v13>>32
v9 += v13
v5 ^= v9
v5 = v5<<(64-24) | v5>>24
v2 += m[s[2]]
v2 += v6
v14 ^= v2
v14 = v14<<(64-32) | v14>>32
v10 += v14
v6 ^= v10
v6 = v6<<(64-24) | v6>>24
v3 += m[s[3]]
v3 += v7
v15 ^= v3
v15 = v15<<(64-32) | v15>>32
v11 += v15
v7 ^= v11
v7 = v7<<(64-24) | v7>>24
v0 += m[s[4]]
v0 += v4
v12 ^= v0
v12 = v12<<(64-16) | v12>>16
v8 += v12
v4 ^= v8
v4 = v4<<(64-63) | v4>>63
v1 += m[s[5]]
v1 += v5
v13 ^= v1
v13 = v13<<(64-16) | v13>>16
v9 += v13
v5 ^= v9
v5 = v5<<(64-63) | v5>>63
v2 += m[s[6]]
v2 += v6
v14 ^= v2
v14 = v14<<(64-16) | v14>>16
v10 += v14
v6 ^= v10
v6 = v6<<(64-63) | v6>>63
v3 += m[s[7]]
v3 += v7
v15 ^= v3
v15 = v15<<(64-16) | v15>>16
v11 += v15
v7 ^= v11
v7 = v7<<(64-63) | v7>>63
v0 += m[s[8]]
v0 += v5
v15 ^= v0
v15 = v15<<(64-32) | v15>>32
v10 += v15
v5 ^= v10
v5 = v5<<(64-24) | v5>>24
v1 += m[s[9]]
v1 += v6
v12 ^= v1
v12 = v12<<(64-32) | v12>>32
v11 += v12
v6 ^= v11
v6 = v6<<(64-24) | v6>>24
v2 += m[s[10]]
v2 += v7
v13 ^= v2
v13 = v13<<(64-32) | v13>>32
v8 += v13
v7 ^= v8
v7 = v7<<(64-24) | v7>>24
v3 += m[s[11]]
v3 += v4
v14 ^= v3
v14 = v14<<(64-32) | v14>>32
v9 += v14
v4 ^= v9
v4 = v4<<(64-24) | v4>>24
v0 += m[s[12]]
v0 += v5
v15 ^= v0
v15 = v15<<(64-16) | v15>>16
v10 += v15
v5 ^= v10
v5 = v5<<(64-63) | v5>>63
v1 += m[s[13]]
v1 += v6
v12 ^= v1
v12 = v12<<(64-16) | v12>>16
v11 += v12
v6 ^= v11
v6 = v6<<(64-63) | v6>>63
v2 += m[s[14]]
v2 += v7
v13 ^= v2
v13 = v13<<(64-16) | v13>>16
v8 += v13
v7 ^= v8
v7 = v7<<(64-63) | v7>>63
v3 += m[s[15]]
v3 += v4
v14 ^= v3
v14 = v14<<(64-16) | v14>>16
v9 += v14
v4 ^= v9
v4 = v4<<(64-63) | v4>>63
}
h[0] ^= v0 ^ v8
h[1] ^= v1 ^ v9
h[2] ^= v2 ^ v10
h[3] ^= v3 ^ v11
h[4] ^= v4 ^ v12
h[5] ^= v5 ^ v13
h[6] ^= v6 ^ v14
h[7] ^= v7 ^ v15
}
c[0], c[1] = c0, c1
}

14
vendor/golang.org/x/crypto/blake2b/blake2b_ref.go generated vendored Normal file
View file

@ -0,0 +1,14 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 appengine gccgo
package blake2b
var useAVX2 = false
var useSSE4 = false
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
hashBlocksGeneric(h, c, flag, blocks)
}

448
vendor/golang.org/x/crypto/blake2b/blake2b_test.go generated vendored Normal file
View file

@ -0,0 +1,448 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blake2b
import (
"bytes"
"encoding/hex"
"fmt"
"hash"
"testing"
)
func fromHex(s string) []byte {
b, err := hex.DecodeString(s)
if err != nil {
panic(err)
}
return b
}
func TestHashes(t *testing.T) {
defer func(sse4, avx, avx2 bool) {
useSSE4, useAVX, useAVX2 = sse4, useAVX, avx2
}(useSSE4, useAVX, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testHashes(t)
useAVX2 = false
}
if useAVX {
t.Log("AVX version")
testHashes(t)
useAVX = false
}
if useSSE4 {
t.Log("SSE4 version")
testHashes(t)
useSSE4 = false
}
t.Log("generic version")
testHashes(t)
}
func testHashes(t *testing.T) {
key, _ := hex.DecodeString("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f")
input := make([]byte, 255)
for i := range input {
input[i] = byte(i)
}
for i, expectedHex := range hashes {
h, err := New512(key)
if err != nil {
t.Fatalf("#%d: error from New512: %v", i, err)
}
h.Write(input[:i])
sum := h.Sum(nil)
if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex {
t.Fatalf("#%d (single write): got %s, wanted %s", i, gotHex, expectedHex)
}
h.Reset()
for j := 0; j < i; j++ {
h.Write(input[j : j+1])
}
sum = h.Sum(sum[:0])
if gotHex := fmt.Sprintf("%x", sum); gotHex != expectedHex {
t.Fatalf("#%d (byte-by-byte): got %s, wanted %s", i, gotHex, expectedHex)
}
}
}
func generateSequence(out []byte, seed uint32) {
a := 0xDEAD4BAD * seed // prime
b := uint32(1)
for i := range out { // fill the buf
a, b = b, a+b
out[i] = byte(b >> 24)
}
}
func computeMAC(msg []byte, hashSize int, key []byte) (sum []byte) {
var h hash.Hash
switch hashSize {
case Size:
h, _ = New512(key)
case Size384:
h, _ = New384(key)
case Size256:
h, _ = New256(key)
case 20:
h, _ = newDigest(20, key)
default:
panic("unexpected hashSize")
}
h.Write(msg)
return h.Sum(sum)
}
func computeHash(msg []byte, hashSize int) (sum []byte) {
switch hashSize {
case Size:
hash := Sum512(msg)
return hash[:]
case Size384:
hash := Sum384(msg)
return hash[:]
case Size256:
hash := Sum256(msg)
return hash[:]
case 20:
var hash [64]byte
checkSum(&hash, 20, msg)
return hash[:20]
default:
panic("unexpected hashSize")
}
}
// Test function from RFC 7693.
func TestSelfTest(t *testing.T) {
hashLens := [4]int{20, 32, 48, 64}
msgLens := [6]int{0, 3, 128, 129, 255, 1024}
msg := make([]byte, 1024)
key := make([]byte, 64)
h, _ := New256(nil)
for _, hashSize := range hashLens {
for _, msgLength := range msgLens {
generateSequence(msg[:msgLength], uint32(msgLength)) // unkeyed hash
md := computeHash(msg[:msgLength], hashSize)
h.Write(md)
generateSequence(key[:], uint32(hashSize)) // keyed hash
md = computeMAC(msg[:msgLength], hashSize, key[:hashSize])
h.Write(md)
}
}
sum := h.Sum(nil)
expected := [32]byte{
0xc2, 0x3a, 0x78, 0x00, 0xd9, 0x81, 0x23, 0xbd,
0x10, 0xf5, 0x06, 0xc6, 0x1e, 0x29, 0xda, 0x56,
0x03, 0xd7, 0x63, 0xb8, 0xbb, 0xad, 0x2e, 0x73,
0x7f, 0x5e, 0x76, 0x5a, 0x7b, 0xcc, 0xd4, 0x75,
}
if !bytes.Equal(sum, expected[:]) {
t.Fatalf("got %x, wanted %x", sum, expected)
}
}
// Benchmarks
func benchmarkSum(b *testing.B, size int) {
data := make([]byte, size)
b.SetBytes(int64(size))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Sum512(data)
}
}
func benchmarkWrite(b *testing.B, size int) {
data := make([]byte, size)
h, _ := New512(nil)
b.SetBytes(int64(size))
b.ResetTimer()
for i := 0; i < b.N; i++ {
h.Write(data)
}
}
func BenchmarkWrite128(b *testing.B) { benchmarkWrite(b, 128) }
func BenchmarkWrite1K(b *testing.B) { benchmarkWrite(b, 1024) }
func BenchmarkSum128(b *testing.B) { benchmarkSum(b, 128) }
func BenchmarkSum1K(b *testing.B) { benchmarkSum(b, 1024) }
// These values were taken from https://blake2.net/blake2b-test.txt.
var hashes = []string{
"10ebb67700b1868efb4417987acf4690ae9d972fb7a590c2f02871799aaa4786b5e996e8f0f4eb981fc214b005f42d2ff4233499391653df7aefcbc13fc51568",
"961f6dd1e4dd30f63901690c512e78e4b45e4742ed197c3c5e45c549fd25f2e4187b0bc9fe30492b16b0d0bc4ef9b0f34c7003fac09a5ef1532e69430234cebd",
"da2cfbe2d8409a0f38026113884f84b50156371ae304c4430173d08a99d9fb1b983164a3770706d537f49e0c916d9f32b95cc37a95b99d857436f0232c88a965",
"33d0825dddf7ada99b0e7e307104ad07ca9cfd9692214f1561356315e784f3e5a17e364ae9dbb14cb2036df932b77f4b292761365fb328de7afdc6d8998f5fc1",
"beaa5a3d08f3807143cf621d95cd690514d0b49efff9c91d24b59241ec0eefa5f60196d407048bba8d2146828ebcb0488d8842fd56bb4f6df8e19c4b4daab8ac",
"098084b51fd13deae5f4320de94a688ee07baea2800486689a8636117b46c1f4c1f6af7f74ae7c857600456a58a3af251dc4723a64cc7c0a5ab6d9cac91c20bb",
"6044540d560853eb1c57df0077dd381094781cdb9073e5b1b3d3f6c7829e12066bbaca96d989a690de72ca3133a83652ba284a6d62942b271ffa2620c9e75b1f",
"7a8cfe9b90f75f7ecb3acc053aaed6193112b6f6a4aeeb3f65d3de541942deb9e2228152a3c4bbbe72fc3b12629528cfbb09fe630f0474339f54abf453e2ed52",
"380beaf6ea7cc9365e270ef0e6f3a64fb902acae51dd5512f84259ad2c91f4bc4108db73192a5bbfb0cbcf71e46c3e21aee1c5e860dc96e8eb0b7b8426e6abe9",
"60fe3c4535e1b59d9a61ea8500bfac41a69dffb1ceadd9aca323e9a625b64da5763bad7226da02b9c8c4f1a5de140ac5a6c1124e4f718ce0b28ea47393aa6637",
"4fe181f54ad63a2983feaaf77d1e7235c2beb17fa328b6d9505bda327df19fc37f02c4b6f0368ce23147313a8e5738b5fa2a95b29de1c7f8264eb77b69f585cd",
"f228773ce3f3a42b5f144d63237a72d99693adb8837d0e112a8a0f8ffff2c362857ac49c11ec740d1500749dac9b1f4548108bf3155794dcc9e4082849e2b85b",
"962452a8455cc56c8511317e3b1f3b2c37df75f588e94325fdd77070359cf63a9ae6e930936fdf8e1e08ffca440cfb72c28f06d89a2151d1c46cd5b268ef8563",
"43d44bfa18768c59896bf7ed1765cb2d14af8c260266039099b25a603e4ddc5039d6ef3a91847d1088d401c0c7e847781a8a590d33a3c6cb4df0fab1c2f22355",
"dcffa9d58c2a4ca2cdbb0c7aa4c4c1d45165190089f4e983bb1c2cab4aaeff1fa2b5ee516fecd780540240bf37e56c8bcca7fab980e1e61c9400d8a9a5b14ac6",
"6fbf31b45ab0c0b8dad1c0f5f4061379912dde5aa922099a030b725c73346c524291adef89d2f6fd8dfcda6d07dad811a9314536c2915ed45da34947e83de34e",
"a0c65bddde8adef57282b04b11e7bc8aab105b99231b750c021f4a735cb1bcfab87553bba3abb0c3e64a0b6955285185a0bd35fb8cfde557329bebb1f629ee93",
"f99d815550558e81eca2f96718aed10d86f3f1cfb675cce06b0eff02f617c5a42c5aa760270f2679da2677c5aeb94f1142277f21c7f79f3c4f0cce4ed8ee62b1",
"95391da8fc7b917a2044b3d6f5374e1ca072b41454d572c7356c05fd4bc1e0f40b8bb8b4a9f6bce9be2c4623c399b0dca0dab05cb7281b71a21b0ebcd9e55670",
"04b9cd3d20d221c09ac86913d3dc63041989a9a1e694f1e639a3ba7e451840f750c2fc191d56ad61f2e7936bc0ac8e094b60caeed878c18799045402d61ceaf9",
"ec0e0ef707e4ed6c0c66f9e089e4954b058030d2dd86398fe84059631f9ee591d9d77375355149178c0cf8f8e7c49ed2a5e4f95488a2247067c208510fadc44c",
"9a37cce273b79c09913677510eaf7688e89b3314d3532fd2764c39de022a2945b5710d13517af8ddc0316624e73bec1ce67df15228302036f330ab0cb4d218dd",
"4cf9bb8fb3d4de8b38b2f262d3c40f46dfe747e8fc0a414c193d9fcf753106ce47a18f172f12e8a2f1c26726545358e5ee28c9e2213a8787aafbc516d2343152",
"64e0c63af9c808fd893137129867fd91939d53f2af04be4fa268006100069b2d69daa5c5d8ed7fddcb2a70eeecdf2b105dd46a1e3b7311728f639ab489326bc9",
"5e9c93158d659b2def06b0c3c7565045542662d6eee8a96a89b78ade09fe8b3dcc096d4fe48815d88d8f82620156602af541955e1f6ca30dce14e254c326b88f",
"7775dff889458dd11aef417276853e21335eb88e4dec9cfb4e9edb49820088551a2ca60339f12066101169f0dfe84b098fddb148d9da6b3d613df263889ad64b",
"f0d2805afbb91f743951351a6d024f9353a23c7ce1fc2b051b3a8b968c233f46f50f806ecb1568ffaa0b60661e334b21dde04f8fa155ac740eeb42e20b60d764",
"86a2af316e7d7754201b942e275364ac12ea8962ab5bd8d7fb276dc5fbffc8f9a28cae4e4867df6780d9b72524160927c855da5b6078e0b554aa91e31cb9ca1d",
"10bdf0caa0802705e706369baf8a3f79d72c0a03a80675a7bbb00be3a45e516424d1ee88efb56f6d5777545ae6e27765c3a8f5e493fc308915638933a1dfee55",
"b01781092b1748459e2e4ec178696627bf4ebafebba774ecf018b79a68aeb84917bf0b84bb79d17b743151144cd66b7b33a4b9e52c76c4e112050ff5385b7f0b",
"c6dbc61dec6eaeac81e3d5f755203c8e220551534a0b2fd105a91889945a638550204f44093dd998c076205dffad703a0e5cd3c7f438a7e634cd59fededb539e",
"eba51acffb4cea31db4b8d87e9bf7dd48fe97b0253ae67aa580f9ac4a9d941f2bea518ee286818cc9f633f2a3b9fb68e594b48cdd6d515bf1d52ba6c85a203a7",
"86221f3ada52037b72224f105d7999231c5e5534d03da9d9c0a12acb68460cd375daf8e24386286f9668f72326dbf99ba094392437d398e95bb8161d717f8991",
"5595e05c13a7ec4dc8f41fb70cb50a71bce17c024ff6de7af618d0cc4e9c32d9570d6d3ea45b86525491030c0d8f2b1836d5778c1ce735c17707df364d054347",
"ce0f4f6aca89590a37fe034dd74dd5fa65eb1cbd0a41508aaddc09351a3cea6d18cb2189c54b700c009f4cbf0521c7ea01be61c5ae09cb54f27bc1b44d658c82",
"7ee80b06a215a3bca970c77cda8761822bc103d44fa4b33f4d07dcb997e36d55298bceae12241b3fa07fa63be5576068da387b8d5859aeab701369848b176d42",
"940a84b6a84d109aab208c024c6ce9647676ba0aaa11f86dbb7018f9fd2220a6d901a9027f9abcf935372727cbf09ebd61a2a2eeb87653e8ecad1bab85dc8327",
"2020b78264a82d9f4151141adba8d44bf20c5ec062eee9b595a11f9e84901bf148f298e0c9f8777dcdbc7cc4670aac356cc2ad8ccb1629f16f6a76bcefbee760",
"d1b897b0e075ba68ab572adf9d9c436663e43eb3d8e62d92fc49c9be214e6f27873fe215a65170e6bea902408a25b49506f47babd07cecf7113ec10c5dd31252",
"b14d0c62abfa469a357177e594c10c194243ed2025ab8aa5ad2fa41ad318e0ff48cd5e60bec07b13634a711d2326e488a985f31e31153399e73088efc86a5c55",
"4169c5cc808d2697dc2a82430dc23e3cd356dc70a94566810502b8d655b39abf9e7f902fe717e0389219859e1945df1af6ada42e4ccda55a197b7100a30c30a1",
"258a4edb113d66c839c8b1c91f15f35ade609f11cd7f8681a4045b9fef7b0b24c82cda06a5f2067b368825e3914e53d6948ede92efd6e8387fa2e537239b5bee",
"79d2d8696d30f30fb34657761171a11e6c3f1e64cbe7bebee159cb95bfaf812b4f411e2f26d9c421dc2c284a3342d823ec293849e42d1e46b0a4ac1e3c86abaa",
"8b9436010dc5dee992ae38aea97f2cd63b946d94fedd2ec9671dcde3bd4ce9564d555c66c15bb2b900df72edb6b891ebcadfeff63c9ea4036a998be7973981e7",
"c8f68e696ed28242bf997f5b3b34959508e42d613810f1e2a435c96ed2ff560c7022f361a9234b9837feee90bf47922ee0fd5f8ddf823718d86d1e16c6090071",
"b02d3eee4860d5868b2c39ce39bfe81011290564dd678c85e8783f29302dfc1399ba95b6b53cd9ebbf400cca1db0ab67e19a325f2d115812d25d00978ad1bca4",
"7693ea73af3ac4dad21ca0d8da85b3118a7d1c6024cfaf557699868217bc0c2f44a199bc6c0edd519798ba05bd5b1b4484346a47c2cadf6bf30b785cc88b2baf",
"a0e5c1c0031c02e48b7f09a5e896ee9aef2f17fc9e18e997d7f6cac7ae316422c2b1e77984e5f3a73cb45deed5d3f84600105e6ee38f2d090c7d0442ea34c46d",
"41daa6adcfdb69f1440c37b596440165c15ada596813e2e22f060fcd551f24dee8e04ba6890387886ceec4a7a0d7fc6b44506392ec3822c0d8c1acfc7d5aebe8",
"14d4d40d5984d84c5cf7523b7798b254e275a3a8cc0a1bd06ebc0bee726856acc3cbf516ff667cda2058ad5c3412254460a82c92187041363cc77a4dc215e487",
"d0e7a1e2b9a447fee83e2277e9ff8010c2f375ae12fa7aaa8ca5a6317868a26a367a0b69fbc1cf32a55d34eb370663016f3d2110230eba754028a56f54acf57c",
"e771aa8db5a3e043e8178f39a0857ba04a3f18e4aa05743cf8d222b0b095825350ba422f63382a23d92e4149074e816a36c1cd28284d146267940b31f8818ea2",
"feb4fd6f9e87a56bef398b3284d2bda5b5b0e166583a66b61e538457ff0584872c21a32962b9928ffab58de4af2edd4e15d8b35570523207ff4e2a5aa7754caa",
"462f17bf005fb1c1b9e671779f665209ec2873e3e411f98dabf240a1d5ec3f95ce6796b6fc23fe171903b502023467dec7273ff74879b92967a2a43a5a183d33",
"d3338193b64553dbd38d144bea71c5915bb110e2d88180dbc5db364fd6171df317fc7268831b5aef75e4342b2fad8797ba39eddcef80e6ec08159350b1ad696d",
"e1590d585a3d39f7cb599abd479070966409a6846d4377acf4471d065d5db94129cc9be92573b05ed226be1e9b7cb0cabe87918589f80dadd4ef5ef25a93d28e",
"f8f3726ac5a26cc80132493a6fedcb0e60760c09cfc84cad178175986819665e76842d7b9fedf76dddebf5d3f56faaad4477587af21606d396ae570d8e719af2",
"30186055c07949948183c850e9a756cc09937e247d9d928e869e20bafc3cd9721719d34e04a0899b92c736084550186886efba2e790d8be6ebf040b209c439a4",
"f3c4276cb863637712c241c444c5cc1e3554e0fddb174d035819dd83eb700b4ce88df3ab3841ba02085e1a99b4e17310c5341075c0458ba376c95a6818fbb3e2",
"0aa007c4dd9d5832393040a1583c930bca7dc5e77ea53add7e2b3f7c8e231368043520d4a3ef53c969b6bbfd025946f632bd7f765d53c21003b8f983f75e2a6a",
"08e9464720533b23a04ec24f7ae8c103145f765387d738777d3d343477fd1c58db052142cab754ea674378e18766c53542f71970171cc4f81694246b717d7564",
"d37ff7ad297993e7ec21e0f1b4b5ae719cdc83c5db687527f27516cbffa822888a6810ee5c1ca7bfe3321119be1ab7bfa0a502671c8329494df7ad6f522d440f",
"dd9042f6e464dcf86b1262f6accfafbd8cfd902ed3ed89abf78ffa482dbdeeb6969842394c9a1168ae3d481a017842f660002d42447c6b22f7b72f21aae021c9",
"bd965bf31e87d70327536f2a341cebc4768eca275fa05ef98f7f1b71a0351298de006fba73fe6733ed01d75801b4a928e54231b38e38c562b2e33ea1284992fa",
"65676d800617972fbd87e4b9514e1c67402b7a331096d3bfac22f1abb95374abc942f16e9ab0ead33b87c91968a6e509e119ff07787b3ef483e1dcdccf6e3022",
"939fa189699c5d2c81ddd1ffc1fa207c970b6a3685bb29ce1d3e99d42f2f7442da53e95a72907314f4588399a3ff5b0a92beb3f6be2694f9f86ecf2952d5b41c",
"c516541701863f91005f314108ceece3c643e04fc8c42fd2ff556220e616aaa6a48aeb97a84bad74782e8dff96a1a2fa949339d722edcaa32b57067041df88cc",
"987fd6e0d6857c553eaebb3d34970a2c2f6e89a3548f492521722b80a1c21a153892346d2cba6444212d56da9a26e324dccbc0dcde85d4d2ee4399eec5a64e8f",
"ae56deb1c2328d9c4017706bce6e99d41349053ba9d336d677c4c27d9fd50ae6aee17e853154e1f4fe7672346da2eaa31eea53fcf24a22804f11d03da6abfc2b",
"49d6a608c9bde4491870498572ac31aac3fa40938b38a7818f72383eb040ad39532bc06571e13d767e6945ab77c0bdc3b0284253343f9f6c1244ebf2ff0df866",
"da582ad8c5370b4469af862aa6467a2293b2b28bd80ae0e91f425ad3d47249fdf98825cc86f14028c3308c9804c78bfeeeee461444ce243687e1a50522456a1d",
"d5266aa3331194aef852eed86d7b5b2633a0af1c735906f2e13279f14931a9fc3b0eac5ce9245273bd1aa92905abe16278ef7efd47694789a7283b77da3c70f8",
"2962734c28252186a9a1111c732ad4de4506d4b4480916303eb7991d659ccda07a9911914bc75c418ab7a4541757ad054796e26797feaf36e9f6ad43f14b35a4",
"e8b79ec5d06e111bdfafd71e9f5760f00ac8ac5d8bf768f9ff6f08b8f026096b1cc3a4c973333019f1e3553e77da3f98cb9f542e0a90e5f8a940cc58e59844b3",
"dfb320c44f9d41d1efdcc015f08dd5539e526e39c87d509ae6812a969e5431bf4fa7d91ffd03b981e0d544cf72d7b1c0374f8801482e6dea2ef903877eba675e",
"d88675118fdb55a5fb365ac2af1d217bf526ce1ee9c94b2f0090b2c58a06ca58187d7fe57c7bed9d26fca067b4110eefcd9a0a345de872abe20de368001b0745",
"b893f2fc41f7b0dd6e2f6aa2e0370c0cff7df09e3acfcc0e920b6e6fad0ef747c40668417d342b80d2351e8c175f20897a062e9765e6c67b539b6ba8b9170545",
"6c67ec5697accd235c59b486d7b70baeedcbd4aa64ebd4eef3c7eac189561a726250aec4d48cadcafbbe2ce3c16ce2d691a8cce06e8879556d4483ed7165c063",
"f1aa2b044f8f0c638a3f362e677b5d891d6fd2ab0765f6ee1e4987de057ead357883d9b405b9d609eea1b869d97fb16d9b51017c553f3b93c0a1e0f1296fedcd",
"cbaa259572d4aebfc1917acddc582b9f8dfaa928a198ca7acd0f2aa76a134a90252e6298a65b08186a350d5b7626699f8cb721a3ea5921b753ae3a2dce24ba3a",
"fa1549c9796cd4d303dcf452c1fbd5744fd9b9b47003d920b92de34839d07ef2a29ded68f6fc9e6c45e071a2e48bd50c5084e96b657dd0404045a1ddefe282ed",
"5cf2ac897ab444dcb5c8d87c495dbdb34e1838b6b629427caa51702ad0f9688525f13bec503a3c3a2c80a65e0b5715e8afab00ffa56ec455a49a1ad30aa24fcd",
"9aaf80207bace17bb7ab145757d5696bde32406ef22b44292ef65d4519c3bb2ad41a59b62cc3e94b6fa96d32a7faadae28af7d35097219aa3fd8cda31e40c275",
"af88b163402c86745cb650c2988fb95211b94b03ef290eed9662034241fd51cf398f8073e369354c43eae1052f9b63b08191caa138aa54fea889cc7024236897",
"48fa7d64e1ceee27b9864db5ada4b53d00c9bc7626555813d3cd6730ab3cc06ff342d727905e33171bde6e8476e77fb1720861e94b73a2c538d254746285f430",
"0e6fd97a85e904f87bfe85bbeb34f69e1f18105cf4ed4f87aec36c6e8b5f68bd2a6f3dc8a9ecb2b61db4eedb6b2ea10bf9cb0251fb0f8b344abf7f366b6de5ab",
"06622da5787176287fdc8fed440bad187d830099c94e6d04c8e9c954cda70c8bb9e1fc4a6d0baa831b9b78ef6648681a4867a11da93ee36e5e6a37d87fc63f6f",
"1da6772b58fabf9c61f68d412c82f182c0236d7d575ef0b58dd22458d643cd1dfc93b03871c316d8430d312995d4197f0874c99172ba004a01ee295abac24e46",
"3cd2d9320b7b1d5fb9aab951a76023fa667be14a9124e394513918a3f44096ae4904ba0ffc150b63bc7ab1eeb9a6e257e5c8f000a70394a5afd842715de15f29",
"04cdc14f7434e0b4be70cb41db4c779a88eaef6accebcb41f2d42fffe7f32a8e281b5c103a27021d0d08362250753cdf70292195a53a48728ceb5844c2d98bab",
"9071b7a8a075d0095b8fb3ae5113785735ab98e2b52faf91d5b89e44aac5b5d4ebbf91223b0ff4c71905da55342e64655d6ef8c89a4768c3f93a6dc0366b5bc8",
"ebb30240dd96c7bc8d0abe49aa4edcbb4afdc51ff9aaf720d3f9e7fbb0f9c6d6571350501769fc4ebd0b2141247ff400d4fd4be414edf37757bb90a32ac5c65a",
"8532c58bf3c8015d9d1cbe00eef1f5082f8f3632fbe9f1ed4f9dfb1fa79e8283066d77c44c4af943d76b300364aecbd0648c8a8939bd204123f4b56260422dec",
"fe9846d64f7c7708696f840e2d76cb4408b6595c2f81ec6a28a7f2f20cb88cfe6ac0b9e9b8244f08bd7095c350c1d0842f64fb01bb7f532dfcd47371b0aeeb79",
"28f17ea6fb6c42092dc264257e29746321fb5bdaea9873c2a7fa9d8f53818e899e161bc77dfe8090afd82bf2266c5c1bc930a8d1547624439e662ef695f26f24",
"ec6b7d7f030d4850acae3cb615c21dd25206d63e84d1db8d957370737ba0e98467ea0ce274c66199901eaec18a08525715f53bfdb0aacb613d342ebdceeddc3b",
"b403d3691c03b0d3418df327d5860d34bbfcc4519bfbce36bf33b208385fadb9186bc78a76c489d89fd57e7dc75412d23bcd1dae8470ce9274754bb8585b13c5",
"31fc79738b8772b3f55cd8178813b3b52d0db5a419d30ba9495c4b9da0219fac6df8e7c23a811551a62b827f256ecdb8124ac8a6792ccfecc3b3012722e94463",
"bb2039ec287091bcc9642fc90049e73732e02e577e2862b32216ae9bedcd730c4c284ef3968c368b7d37584f97bd4b4dc6ef6127acfe2e6ae2509124e66c8af4",
"f53d68d13f45edfcb9bd415e2831e938350d5380d3432278fc1c0c381fcb7c65c82dafe051d8c8b0d44e0974a0e59ec7bf7ed0459f86e96f329fc79752510fd3",
"8d568c7984f0ecdf7640fbc483b5d8c9f86634f6f43291841b309a350ab9c1137d24066b09da9944bac54d5bb6580d836047aac74ab724b887ebf93d4b32eca9",
"c0b65ce5a96ff774c456cac3b5f2c4cd359b4ff53ef93a3da0778be4900d1e8da1601e769e8f1b02d2a2f8c5b9fa10b44f1c186985468feeb008730283a6657d",
"4900bba6f5fb103ece8ec96ada13a5c3c85488e05551da6b6b33d988e611ec0fe2e3c2aa48ea6ae8986a3a231b223c5d27cec2eadde91ce07981ee652862d1e4",
"c7f5c37c7285f927f76443414d4357ff789647d7a005a5a787e03c346b57f49f21b64fa9cf4b7e45573e23049017567121a9c3d4b2b73ec5e9413577525db45a",
"ec7096330736fdb2d64b5653e7475da746c23a4613a82687a28062d3236364284ac01720ffb406cfe265c0df626a188c9e5963ace5d3d5bb363e32c38c2190a6",
"82e744c75f4649ec52b80771a77d475a3bc091989556960e276a5f9ead92a03f718742cdcfeaee5cb85c44af198adc43a4a428f5f0c2ddb0be36059f06d7df73",
"2834b7a7170f1f5b68559ab78c1050ec21c919740b784a9072f6e5d69f828d70c919c5039fb148e39e2c8a52118378b064ca8d5001cd10a5478387b966715ed6",
"16b4ada883f72f853bb7ef253efcab0c3e2161687ad61543a0d2824f91c1f81347d86be709b16996e17f2dd486927b0288ad38d13063c4a9672c39397d3789b6",
"78d048f3a69d8b54ae0ed63a573ae350d89f7c6cf1f3688930de899afa037697629b314e5cd303aa62feea72a25bf42b304b6c6bcb27fae21c16d925e1fbdac3",
"0f746a48749287ada77a82961f05a4da4abdb7d77b1220f836d09ec814359c0ec0239b8c7b9ff9e02f569d1b301ef67c4612d1de4f730f81c12c40cc063c5caa",
"f0fc859d3bd195fbdc2d591e4cdac15179ec0f1dc821c11df1f0c1d26e6260aaa65b79fafacafd7d3ad61e600f250905f5878c87452897647a35b995bcadc3a3",
"2620f687e8625f6a412460b42e2cef67634208ce10a0cbd4dff7044a41b7880077e9f8dc3b8d1216d3376a21e015b58fb279b521d83f9388c7382c8505590b9b",
"227e3aed8d2cb10b918fcb04f9de3e6d0a57e08476d93759cd7b2ed54a1cbf0239c528fb04bbf288253e601d3bc38b21794afef90b17094a182cac557745e75f",
"1a929901b09c25f27d6b35be7b2f1c4745131fdebca7f3e2451926720434e0db6e74fd693ad29b777dc3355c592a361c4873b01133a57c2e3b7075cbdb86f4fc",
"5fd7968bc2fe34f220b5e3dc5af9571742d73b7d60819f2888b629072b96a9d8ab2d91b82d0a9aaba61bbd39958132fcc4257023d1eca591b3054e2dc81c8200",
"dfcce8cf32870cc6a503eadafc87fd6f78918b9b4d0737db6810be996b5497e7e5cc80e312f61e71ff3e9624436073156403f735f56b0b01845c18f6caf772e6",
"02f7ef3a9ce0fff960f67032b296efca3061f4934d690749f2d01c35c81c14f39a67fa350bc8a0359bf1724bffc3bca6d7c7bba4791fd522a3ad353c02ec5aa8",
"64be5c6aba65d594844ae78bb022e5bebe127fd6b6ffa5a13703855ab63b624dcd1a363f99203f632ec386f3ea767fc992e8ed9686586aa27555a8599d5b808f",
"f78585505c4eaa54a8b5be70a61e735e0ff97af944ddb3001e35d86c4e2199d976104b6ae31750a36a726ed285064f5981b503889fef822fcdc2898dddb7889a",
"e4b5566033869572edfd87479a5bb73c80e8759b91232879d96b1dda36c012076ee5a2ed7ae2de63ef8406a06aea82c188031b560beafb583fb3de9e57952a7e",
"e1b3e7ed867f6c9484a2a97f7715f25e25294e992e41f6a7c161ffc2adc6daaeb7113102d5e6090287fe6ad94ce5d6b739c6ca240b05c76fb73f25dd024bf935",
"85fd085fdc12a080983df07bd7012b0d402a0f4043fcb2775adf0bad174f9b08d1676e476985785c0a5dcc41dbff6d95ef4d66a3fbdc4a74b82ba52da0512b74",
"aed8fa764b0fbff821e05233d2f7b0900ec44d826f95e93c343c1bc3ba5a24374b1d616e7e7aba453a0ada5e4fab5382409e0d42ce9c2bc7fb39a99c340c20f0",
"7ba3b2e297233522eeb343bd3ebcfd835a04007735e87f0ca300cbee6d416565162171581e4020ff4cf176450f1291ea2285cb9ebffe4c56660627685145051c",
"de748bcf89ec88084721e16b85f30adb1a6134d664b5843569babc5bbd1a15ca9b61803c901a4fef32965a1749c9f3a4e243e173939dc5a8dc495c671ab52145",
"aaf4d2bdf200a919706d9842dce16c98140d34bc433df320aba9bd429e549aa7a3397652a4d768277786cf993cde2338673ed2e6b66c961fefb82cd20c93338f",
"c408218968b788bf864f0997e6bc4c3dba68b276e2125a4843296052ff93bf5767b8cdce7131f0876430c1165fec6c4f47adaa4fd8bcfacef463b5d3d0fa61a0",
"76d2d819c92bce55fa8e092ab1bf9b9eab237a25267986cacf2b8ee14d214d730dc9a5aa2d7b596e86a1fd8fa0804c77402d2fcd45083688b218b1cdfa0dcbcb",
"72065ee4dd91c2d8509fa1fc28a37c7fc9fa7d5b3f8ad3d0d7a25626b57b1b44788d4caf806290425f9890a3a2a35a905ab4b37acfd0da6e4517b2525c9651e4",
"64475dfe7600d7171bea0b394e27c9b00d8e74dd1e416a79473682ad3dfdbb706631558055cfc8a40e07bd015a4540dcdea15883cbbf31412df1de1cd4152b91",
"12cd1674a4488a5d7c2b3160d2e2c4b58371bedad793418d6f19c6ee385d70b3e06739369d4df910edb0b0a54cbff43d54544cd37ab3a06cfa0a3ddac8b66c89",
"60756966479dedc6dd4bcff8ea7d1d4ce4d4af2e7b097e32e3763518441147cc12b3c0ee6d2ecabf1198cec92e86a3616fba4f4e872f5825330adbb4c1dee444",
"a7803bcb71bc1d0f4383dde1e0612e04f872b715ad30815c2249cf34abb8b024915cb2fc9f4e7cc4c8cfd45be2d5a91eab0941c7d270e2da4ca4a9f7ac68663a",
"b84ef6a7229a34a750d9a98ee2529871816b87fbe3bc45b45fa5ae82d5141540211165c3c5d7a7476ba5a4aa06d66476f0d9dc49a3f1ee72c3acabd498967414",
"fae4b6d8efc3f8c8e64d001dabec3a21f544e82714745251b2b4b393f2f43e0da3d403c64db95a2cb6e23ebb7b9e94cdd5ddac54f07c4a61bd3cb10aa6f93b49",
"34f7286605a122369540141ded79b8957255da2d4155abbf5a8dbb89c8eb7ede8eeef1daa46dc29d751d045dc3b1d658bb64b80ff8589eddb3824b13da235a6b",
"3b3b48434be27b9eababba43bf6b35f14b30f6a88dc2e750c358470d6b3aa3c18e47db4017fa55106d8252f016371a00f5f8b070b74ba5f23cffc5511c9f09f0",
"ba289ebd6562c48c3e10a8ad6ce02e73433d1e93d7c9279d4d60a7e879ee11f441a000f48ed9f7c4ed87a45136d7dccdca482109c78a51062b3ba4044ada2469",
"022939e2386c5a37049856c850a2bb10a13dfea4212b4c732a8840a9ffa5faf54875c5448816b2785a007da8a8d2bc7d71a54e4e6571f10b600cbdb25d13ede3",
"e6fec19d89ce8717b1a087024670fe026f6c7cbda11caef959bb2d351bf856f8055d1c0ebdaaa9d1b17886fc2c562b5e99642fc064710c0d3488a02b5ed7f6fd",
"94c96f02a8f576aca32ba61c2b206f907285d9299b83ac175c209a8d43d53bfe683dd1d83e7549cb906c28f59ab7c46f8751366a28c39dd5fe2693c9019666c8",
"31a0cd215ebd2cb61de5b9edc91e6195e31c59a5648d5c9f737e125b2605708f2e325ab3381c8dce1a3e958886f1ecdc60318f882cfe20a24191352e617b0f21",
"91ab504a522dce78779f4c6c6ba2e6b6db5565c76d3e7e7c920caf7f757ef9db7c8fcf10e57f03379ea9bf75eb59895d96e149800b6aae01db778bb90afbc989",
"d85cabc6bd5b1a01a5afd8c6734740da9fd1c1acc6db29bfc8a2e5b668b028b6b3154bfb8703fa3180251d589ad38040ceb707c4bad1b5343cb426b61eaa49c1",
"d62efbec2ca9c1f8bd66ce8b3f6a898cb3f7566ba6568c618ad1feb2b65b76c3ce1dd20f7395372faf28427f61c9278049cf0140df434f5633048c86b81e0399",
"7c8fdc6175439e2c3db15bafa7fb06143a6a23bc90f449e79deef73c3d492a671715c193b6fea9f036050b946069856b897e08c00768f5ee5ddcf70b7cd6d0e0",
"58602ee7468e6bc9df21bd51b23c005f72d6cb013f0a1b48cbec5eca299299f97f09f54a9a01483eaeb315a6478bad37ba47ca1347c7c8fc9e6695592c91d723",
"27f5b79ed256b050993d793496edf4807c1d85a7b0a67c9c4fa99860750b0ae66989670a8ffd7856d7ce411599e58c4d77b232a62bef64d15275be46a68235ff",
"3957a976b9f1887bf004a8dca942c92d2b37ea52600f25e0c9bc5707d0279c00c6e85a839b0d2d8eb59c51d94788ebe62474a791cadf52cccf20f5070b6573fc",
"eaa2376d55380bf772ecca9cb0aa4668c95c707162fa86d518c8ce0ca9bf7362b9f2a0adc3ff59922df921b94567e81e452f6c1a07fc817cebe99604b3505d38",
"c1e2c78b6b2734e2480ec550434cb5d613111adcc21d475545c3b1b7e6ff12444476e5c055132e2229dc0f807044bb919b1a5662dd38a9ee65e243a3911aed1a",
"8ab48713389dd0fcf9f965d3ce66b1e559a1f8c58741d67683cd971354f452e62d0207a65e436c5d5d8f8ee71c6abfe50e669004c302b31a7ea8311d4a916051",
"24ce0addaa4c65038bd1b1c0f1452a0b128777aabc94a29df2fd6c7e2f85f8ab9ac7eff516b0e0a825c84a24cfe492eaad0a6308e46dd42fe8333ab971bb30ca",
"5154f929ee03045b6b0c0004fa778edee1d139893267cc84825ad7b36c63de32798e4a166d24686561354f63b00709a1364b3c241de3febf0754045897467cd4",
"e74e907920fd87bd5ad636dd11085e50ee70459c443e1ce5809af2bc2eba39f9e6d7128e0e3712c316da06f4705d78a4838e28121d4344a2c79c5e0db307a677",
"bf91a22334bac20f3fd80663b3cd06c4e8802f30e6b59f90d3035cc9798a217ed5a31abbda7fa6842827bdf2a7a1c21f6fcfccbb54c6c52926f32da816269be1",
"d9d5c74be5121b0bd742f26bffb8c89f89171f3f934913492b0903c271bbe2b3395ef259669bef43b57f7fcc3027db01823f6baee66e4f9fead4d6726c741fce",
"50c8b8cf34cd879f80e2faab3230b0c0e1cc3e9dcadeb1b9d97ab923415dd9a1fe38addd5c11756c67990b256e95ad6d8f9fedce10bf1c90679cde0ecf1be347",
"0a386e7cd5dd9b77a035e09fe6fee2c8ce61b5383c87ea43205059c5e4cd4f4408319bb0a82360f6a58e6c9ce3f487c446063bf813bc6ba535e17fc1826cfc91",
"1f1459cb6b61cbac5f0efe8fc487538f42548987fcd56221cfa7beb22504769e792c45adfb1d6b3d60d7b749c8a75b0bdf14e8ea721b95dca538ca6e25711209",
"e58b3836b7d8fedbb50ca5725c6571e74c0785e97821dab8b6298c10e4c079d4a6cdf22f0fedb55032925c16748115f01a105e77e00cee3d07924dc0d8f90659",
"b929cc6505f020158672deda56d0db081a2ee34c00c1100029bdf8ea98034fa4bf3e8655ec697fe36f40553c5bb46801644a627d3342f4fc92b61f03290fb381",
"72d353994b49d3e03153929a1e4d4f188ee58ab9e72ee8e512f29bc773913819ce057ddd7002c0433ee0a16114e3d156dd2c4a7e80ee53378b8670f23e33ef56",
"c70ef9bfd775d408176737a0736d68517ce1aaad7e81a93c8c1ed967ea214f56c8a377b1763e676615b60f3988241eae6eab9685a5124929d28188f29eab06f7",
"c230f0802679cb33822ef8b3b21bf7a9a28942092901d7dac3760300831026cf354c9232df3e084d9903130c601f63c1f4a4a4b8106e468cd443bbe5a734f45f",
"6f43094cafb5ebf1f7a4937ec50f56a4c9da303cbb55ac1f27f1f1976cd96beda9464f0e7b9c54620b8a9fba983164b8be3578425a024f5fe199c36356b88972",
"3745273f4c38225db2337381871a0c6aafd3af9b018c88aa02025850a5dc3a42a1a3e03e56cbf1b0876d63a441f1d2856a39b8801eb5af325201c415d65e97fe",
"c50c44cca3ec3edaae779a7e179450ebdda2f97067c690aa6c5a4ac7c30139bb27c0df4db3220e63cb110d64f37ffe078db72653e2daacf93ae3f0a2d1a7eb2e",
"8aef263e385cbc61e19b28914243262af5afe8726af3ce39a79c27028cf3ecd3f8d2dfd9cfc9ad91b58f6f20778fd5f02894a3d91c7d57d1e4b866a7f364b6be",
"28696141de6e2d9bcb3235578a66166c1448d3e905a1b482d423be4bc5369bc8c74dae0acc9cc123e1d8ddce9f97917e8c019c552da32d39d2219b9abf0fa8c8",
"2fb9eb2085830181903a9dafe3db428ee15be7662224efd643371fb25646aee716e531eca69b2bdc8233f1a8081fa43da1500302975a77f42fa592136710e9dc",
"66f9a7143f7a3314a669bf2e24bbb35014261d639f495b6c9c1f104fe8e320aca60d4550d69d52edbd5a3cdeb4014ae65b1d87aa770b69ae5c15f4330b0b0ad8",
"f4c4dd1d594c3565e3e25ca43dad82f62abea4835ed4cd811bcd975e46279828d44d4c62c3679f1b7f7b9dd4571d7b49557347b8c5460cbdc1bef690fb2a08c0",
"8f1dc9649c3a84551f8f6e91cac68242a43b1f8f328ee92280257387fa7559aa6db12e4aeadc2d26099178749c6864b357f3f83b2fb3efa8d2a8db056bed6bcc",
"3139c1a7f97afd1675d460ebbc07f2728aa150df849624511ee04b743ba0a833092f18c12dc91b4dd243f333402f59fe28abdbbbae301e7b659c7a26d5c0f979",
"06f94a2996158a819fe34c40de3cf0379fd9fb85b3e363ba3926a0e7d960e3f4c2e0c70c7ce0ccb2a64fc29869f6e7ab12bd4d3f14fce943279027e785fb5c29",
"c29c399ef3eee8961e87565c1ce263925fc3d0ce267d13e48dd9e732ee67b0f69fad56401b0f10fcaac119201046cca28c5b14abdea3212ae65562f7f138db3d",
"4cec4c9df52eef05c3f6faaa9791bc7445937183224ecc37a1e58d0132d35617531d7e795f52af7b1eb9d147de1292d345fe341823f8e6bc1e5badca5c656108",
"898bfbae93b3e18d00697eab7d9704fa36ec339d076131cefdf30edbe8d9cc81c3a80b129659b163a323bab9793d4feed92d54dae966c77529764a09be88db45",
"ee9bd0469d3aaf4f14035be48a2c3b84d9b4b1fff1d945e1f1c1d38980a951be197b25fe22c731f20aeacc930ba9c4a1f4762227617ad350fdabb4e80273a0f4",
"3d4d3113300581cd96acbf091c3d0f3c310138cd6979e6026cde623e2dd1b24d4a8638bed1073344783ad0649cc6305ccec04beb49f31c633088a99b65130267",
"95c0591ad91f921ac7be6d9ce37e0663ed8011c1cfd6d0162a5572e94368bac02024485e6a39854aa46fe38e97d6c6b1947cd272d86b06bb5b2f78b9b68d559d",
"227b79ded368153bf46c0a3ca978bfdbef31f3024a5665842468490b0ff748ae04e7832ed4c9f49de9b1706709d623e5c8c15e3caecae8d5e433430ff72f20eb",
"5d34f3952f0105eef88ae8b64c6ce95ebfade0e02c69b08762a8712d2e4911ad3f941fc4034dc9b2e479fdbcd279b902faf5d838bb2e0c6495d372b5b7029813",
"7f939bf8353abce49e77f14f3750af20b7b03902e1a1e7fb6aaf76d0259cd401a83190f15640e74f3e6c5a90e839c7821f6474757f75c7bf9002084ddc7a62dc",
"062b61a2f9a33a71d7d0a06119644c70b0716a504de7e5e1be49bd7b86e7ed6817714f9f0fc313d06129597e9a2235ec8521de36f7290a90ccfc1ffa6d0aee29",
"f29e01eeae64311eb7f1c6422f946bf7bea36379523e7b2bbaba7d1d34a22d5ea5f1c5a09d5ce1fe682cced9a4798d1a05b46cd72dff5c1b355440b2a2d476bc",
"ec38cd3bbab3ef35d7cb6d5c914298351d8a9dc97fcee051a8a02f58e3ed6184d0b7810a5615411ab1b95209c3c810114fdeb22452084e77f3f847c6dbaafe16",
"c2aef5e0ca43e82641565b8cb943aa8ba53550caef793b6532fafad94b816082f0113a3ea2f63608ab40437ecc0f0229cb8fa224dcf1c478a67d9b64162b92d1",
"15f534efff7105cd1c254d074e27d5898b89313b7d366dc2d7d87113fa7d53aae13f6dba487ad8103d5e854c91fdb6e1e74b2ef6d1431769c30767dde067a35c",
"89acbca0b169897a0a2714c2df8c95b5b79cb69390142b7d6018bb3e3076b099b79a964152a9d912b1b86412b7e372e9cecad7f25d4cbab8a317be36492a67d7",
"e3c0739190ed849c9c962fd9dbb55e207e624fcac1eb417691515499eea8d8267b7e8f1287a63633af5011fde8c4ddf55bfdf722edf88831414f2cfaed59cb9a",
"8d6cf87c08380d2d1506eee46fd4222d21d8c04e585fbfd08269c98f702833a156326a0724656400ee09351d57b440175e2a5de93cc5f80db6daf83576cf75fa",
"da24bede383666d563eeed37f6319baf20d5c75d1635a6ba5ef4cfa1ac95487e96f8c08af600aab87c986ebad49fc70a58b4890b9c876e091016daf49e1d322e",
"f9d1d1b1e87ea7ae753a029750cc1cf3d0157d41805e245c5617bb934e732f0ae3180b78e05bfe76c7c3051e3e3ac78b9b50c05142657e1e03215d6ec7bfd0fc",
"11b7bc1668032048aa43343de476395e814bbbc223678db951a1b03a021efac948cfbe215f97fe9a72a2f6bc039e3956bfa417c1a9f10d6d7ba5d3d32ff323e5",
"b8d9000e4fc2b066edb91afee8e7eb0f24e3a201db8b6793c0608581e628ed0bcc4e5aa6787992a4bcc44e288093e63ee83abd0bc3ec6d0934a674a4da13838a",
"ce325e294f9b6719d6b61278276ae06a2564c03bb0b783fafe785bdf89c7d5acd83e78756d301b445699024eaeb77b54d477336ec2a4f332f2b3f88765ddb0c3",
"29acc30e9603ae2fccf90bf97e6cc463ebe28c1b2f9b4b765e70537c25c702a29dcbfbf14c99c54345ba2b51f17b77b5f15db92bbad8fa95c471f5d070a137cc",
"3379cbaae562a87b4c0425550ffdd6bfe1203f0d666cc7ea095be407a5dfe61ee91441cd5154b3e53b4f5fb31ad4c7a9ad5c7af4ae679aa51a54003a54ca6b2d",
"3095a349d245708c7cf550118703d7302c27b60af5d4e67fc978f8a4e60953c7a04f92fcf41aee64321ccb707a895851552b1e37b00bc5e6b72fa5bcef9e3fff",
"07262d738b09321f4dbccec4bb26f48cb0f0ed246ce0b31b9a6e7bc683049f1f3e5545f28ce932dd985c5ab0f43bd6de0770560af329065ed2e49d34624c2cbb",
"b6405eca8ee3316c87061cc6ec18dba53e6c250c63ba1f3bae9e55dd3498036af08cd272aa24d713c6020d77ab2f3919af1a32f307420618ab97e73953994fb4",
"7ee682f63148ee45f6e5315da81e5c6e557c2c34641fc509c7a5701088c38a74756168e2cd8d351e88fd1a451f360a01f5b2580f9b5a2e8cfc138f3dd59a3ffc",
"1d263c179d6b268f6fa016f3a4f29e943891125ed8593c81256059f5a7b44af2dcb2030d175c00e62ecaf7ee96682aa07ab20a611024a28532b1c25b86657902",
"106d132cbdb4cd2597812846e2bc1bf732fec5f0a5f65dbb39ec4e6dc64ab2ce6d24630d0f15a805c3540025d84afa98e36703c3dbee713e72dde8465bc1be7e",
"0e79968226650667a8d862ea8da4891af56a4e3a8b6d1750e394f0dea76d640d85077bcec2cc86886e506751b4f6a5838f7f0b5fef765d9dc90dcdcbaf079f08",
"521156a82ab0c4e566e5844d5e31ad9aaf144bbd5a464fdca34dbd5717e8ff711d3ffebbfa085d67fe996a34f6d3e4e60b1396bf4b1610c263bdbb834d560816",
"1aba88befc55bc25efbce02db8b9933e46f57661baeabeb21cc2574d2a518a3cba5dc5a38e49713440b25f9c744e75f6b85c9d8f4681f676160f6105357b8406",
"5a9949fcb2c473cda968ac1b5d08566dc2d816d960f57e63b898fa701cf8ebd3f59b124d95bfbbedc5f1cf0e17d5eaed0c02c50b69d8a402cabcca4433b51fd4",
"b0cead09807c672af2eb2b0f06dde46cf5370e15a4096b1a7d7cbb36ec31c205fbefca00b7a4162fa89fb4fb3eb78d79770c23f44e7206664ce3cd931c291e5d",
"bb6664931ec97044e45b2ae420ae1c551a8874bc937d08e969399c3964ebdba8346cdd5d09caafe4c28ba7ec788191ceca65ddd6f95f18583e040d0f30d0364d",
"65bc770a5faa3792369803683e844b0be7ee96f29f6d6a35568006bd5590f9a4ef639b7a8061c7b0424b66b60ac34af3119905f33a9d8c3ae18382ca9b689900",
"ea9b4dca333336aaf839a45c6eaa48b8cb4c7ddabffea4f643d6357ea6628a480a5b45f2b052c1b07d1fedca918b6f1139d80f74c24510dcbaa4be70eacc1b06",
"e6342fb4a780ad975d0e24bce149989b91d360557e87994f6b457b895575cc02d0c15bad3ce7577f4c63927ff13f3e381ff7e72bdbe745324844a9d27e3f1c01",
"3e209c9b33e8e461178ab46b1c64b49a07fb745f1c8bc95fbfb94c6b87c69516651b264ef980937fad41238b91ddc011a5dd777c7efd4494b4b6ecd3a9c22ac0",
"fd6a3d5b1875d80486d6e69694a56dbb04a99a4d051f15db2689776ba1c4882e6d462a603b7015dc9f4b7450f05394303b8652cfb404a266962c41bae6e18a94",
"951e27517e6bad9e4195fc8671dee3e7e9be69cee1422cb9fecfce0dba875f7b310b93ee3a3d558f941f635f668ff832d2c1d033c5e2f0997e4c66f147344e02",
"8eba2f874f1ae84041903c7c4253c82292530fc8509550bfdc34c95c7e2889d5650b0ad8cb988e5c4894cb87fbfbb19612ea93ccc4c5cad17158b9763464b492",
"16f712eaa1b7c6354719a8e7dbdfaf55e4063a4d277d947550019b38dfb564830911057d50506136e2394c3b28945cc964967d54e3000c2181626cfb9b73efd2",
"c39639e7d5c7fb8cdd0fd3e6a52096039437122f21c78f1679cea9d78a734c56ecbeb28654b4f18e342c331f6f7229ec4b4bc281b2d80a6eb50043f31796c88c",
"72d081af99f8a173dcc9a0ac4eb3557405639a29084b54a40172912a2f8a395129d5536f0918e902f9e8fa6000995f4168ddc5f893011be6a0dbc9b8a1a3f5bb",
"c11aa81e5efd24d5fc27ee586cfd8847fbb0e27601ccece5ecca0198e3c7765393bb74457c7e7a27eb9170350e1fb53857177506be3e762cc0f14d8c3afe9077",
"c28f2150b452e6c0c424bcde6f8d72007f9310fed7f2f87de0dbb64f4479d6c1441ba66f44b2accee61609177ed340128b407ecec7c64bbe50d63d22d8627727",
"f63d88122877ec30b8c8b00d22e89000a966426112bd44166e2f525b769ccbe9b286d437a0129130dde1a86c43e04bedb594e671d98283afe64ce331de9828fd",
"348b0532880b88a6614a8d7408c3f913357fbb60e995c60205be9139e74998aede7f4581e42f6b52698f7fa1219708c14498067fd1e09502de83a77dd281150c",
"5133dc8bef725359dff59792d85eaf75b7e1dcd1978b01c35b1b85fcebc63388ad99a17b6346a217dc1a9622ebd122ecf6913c4d31a6b52a695b86af00d741a0",
"2753c4c0e98ecad806e88780ec27fccd0f5c1ab547f9e4bf1659d192c23aa2cc971b58b6802580baef8adc3b776ef7086b2545c2987f348ee3719cdef258c403",
"b1663573ce4b9d8caefc865012f3e39714b9898a5da6ce17c25a6a47931a9ddb9bbe98adaa553beed436e89578455416c2a52a525cf2862b8d1d49a2531b7391",
"64f58bd6bfc856f5e873b2a2956ea0eda0d6db0da39c8c7fc67c9f9feefcff3072cdf9e6ea37f69a44f0c61aa0da3693c2db5b54960c0281a088151db42b11e8",
"0764c7be28125d9065c4b98a69d60aede703547c66a12e17e1c618994132f5ef82482c1e3fe3146cc65376cc109f0138ed9a80e49f1f3c7d610d2f2432f20605",
"f748784398a2ff03ebeb07e155e66116a839741a336e32da71ec696001f0ad1b25cd48c69cfca7265eca1dd71904a0ce748ac4124f3571076dfa7116a9cf00e9",
"3f0dbc0186bceb6b785ba78d2a2a013c910be157bdaffae81bb6663b1a73722f7f1228795f3ecada87cf6ef0078474af73f31eca0cc200ed975b6893f761cb6d",
"d4762cd4599876ca75b2b8fe249944dbd27ace741fdab93616cbc6e425460feb51d4e7adcc38180e7fc47c89024a7f56191adb878dfde4ead62223f5a2610efe",
"cd36b3d5b4c91b90fcbba79513cfee1907d8645a162afd0cd4cf4192d4a5f4c892183a8eacdb2b6b6a9d9aa8c11ac1b261b380dbee24ca468f1bfd043c58eefe",
"98593452281661a53c48a9d8cd790826c1a1ce567738053d0bee4a91a3d5bd92eefdbabebe3204f2031ca5f781bda99ef5d8ae56e5b04a9e1ecd21b0eb05d3e1",
"771f57dd2775ccdab55921d3e8e30ccf484d61fe1c1b9c2ae819d0fb2a12fab9be70c4a7a138da84e8280435daade5bbe66af0836a154f817fb17f3397e725a3",
"c60897c6f828e21f16fbb5f15b323f87b6c8955eabf1d38061f707f608abdd993fac3070633e286cf8339ce295dd352df4b4b40b2f29da1dd50b3a05d079e6bb",
"8210cd2c2d3b135c2cf07fa0d1433cd771f325d075c6469d9c7f1ba0943cd4ab09808cabf4acb9ce5bb88b498929b4b847f681ad2c490d042db2aec94214b06b",
"1d4edfffd8fd80f7e4107840fa3aa31e32598491e4af7013c197a65b7f36dd3ac4b478456111cd4309d9243510782fa31b7c4c95fa951520d020eb7e5c36e4ef",
"af8e6e91fab46ce4873e1a50a8ef448cc29121f7f74deef34a71ef89cc00d9274bc6c2454bbb3230d8b2ec94c62b1dec85f3593bfa30ea6f7a44d7c09465a253",
"29fd384ed4906f2d13aa9fe7af905990938bed807f1832454a372ab412eea1f5625a1fcc9ac8343b7c67c5aba6e0b1cc4644654913692c6b39eb9187ceacd3ec",
"a268c7885d9874a51c44dffed8ea53e94f78456e0b2ed99ff5a3924760813826d960a15edbedbb5de5226ba4b074e71b05c55b9756bb79e55c02754c2c7b6c8a",
"0cf8545488d56a86817cd7ecb10f7116b7ea530a45b6ea497b6c72c997e09e3d0da8698f46bb006fc977c2cd3d1177463ac9057fdd1662c85d0c126443c10473",
"b39614268fdd8781515e2cfebf89b4d5402bab10c226e6344e6b9ae000fb0d6c79cb2f3ec80e80eaeb1980d2f8698916bd2e9f747236655116649cd3ca23a837",
"74bef092fc6f1e5dba3663a3fb003b2a5ba257496536d99f62b9d73f8f9eb3ce9ff3eec709eb883655ec9eb896b9128f2afc89cf7d1ab58a72f4a3bf034d2b4a",
"3a988d38d75611f3ef38b8774980b33e573b6c57bee0469ba5eed9b44f29945e7347967fba2c162e1c3be7f310f2f75ee2381e7bfd6b3f0baea8d95dfb1dafb1",
"58aedfce6f67ddc85a28c992f1c0bd0969f041e66f1ee88020a125cbfcfebcd61709c9c4eba192c15e69f020d462486019fa8dea0cd7a42921a19d2fe546d43d",
"9347bd291473e6b4e368437b8e561e065f649a6d8ada479ad09b1999a8f26b91cf6120fd3bfe014e83f23acfa4c0ad7b3712b2c3c0733270663112ccd9285cd9",
"b32163e7c5dbb5f51fdc11d2eac875efbbcb7e7699090a7e7ff8a8d50795af5d74d9ff98543ef8cdf89ac13d0485278756e0ef00c817745661e1d59fe38e7537",
"1085d78307b1c4b008c57a2e7e5b234658a0a82e4ff1e4aaac72b312fda0fe27d233bc5b10e9cc17fdc7697b540c7d95eb215a19a1a0e20e1abfa126efd568c7",
"4e5c734c7dde011d83eac2b7347b373594f92d7091b9ca34cb9c6f39bdf5a8d2f134379e16d822f6522170ccf2ddd55c84b9e6c64fc927ac4cf8dfb2a17701f2",
"695d83bd990a1117b3d0ce06cc888027d12a054c2677fd82f0d4fbfc93575523e7991a5e35a3752e9b70ce62992e268a877744cdd435f5f130869c9a2074b338",
"a6213743568e3b3158b9184301f3690847554c68457cb40fc9a4b8cfd8d4a118c301a07737aeda0f929c68913c5f51c80394f53bff1c3e83b2e40ca97eba9e15",
"d444bfa2362a96df213d070e33fa841f51334e4e76866b8139e8af3bb3398be2dfaddcbc56b9146de9f68118dc5829e74b0c28d7711907b121f9161cb92b69a9",
"142709d62e28fcccd0af97fad0f8465b971e82201dc51070faa0372aa43e92484be1c1e73ba10906d5d1853db6a4106e0a7bf9800d373d6dee2d46d62ef2a461",
}