From e10934a88e6a6825ce8b77bd63d0444907edf3f1 Mon Sep 17 00:00:00 2001
From: Harshavardhana <harsha@minio.io>
Date: Wed, 22 Jun 2016 17:13:26 -0700
Subject: [PATCH] bitrot: Start using blake2b algorithm and remove sha512
 usage. (#1957)

Fixes #1952
---
 docs/backend/xl/xl.json                       |    8 +-
 erasure-createfile.go                         |    2 +-
 erasure-utils.go                              |   11 +-
 pkg/crypto/sha512/LICENSE                     |  202 ---
 .../sha512/sha512-avx-asm_linux_amd64.S       |  686 --------
 .../sha512/sha512-avx2-asm_linux_amd64.S      | 1006 ------------
 .../sha512/sha512-ssse3-asm_linux_amd64.S     |  686 --------
 pkg/crypto/sha512/sha512.go                   |   41 -
 pkg/crypto/sha512/sha512_linux.go             |  166 --
 pkg/crypto/sha512/sha512_test.go              |  141 --
 pkg/crypto/sha512/sha512block.go              |  181 ---
 vendor/github.com/dchest/blake2b/README       |   23 +
 vendor/github.com/dchest/blake2b/blake2b.go   |  299 ++++
 vendor/github.com/dchest/blake2b/block.go     | 1420 +++++++++++++++++
 vendor/vendor.json                            |    5 +
 15 files changed, 1758 insertions(+), 3119 deletions(-)
 delete mode 100644 pkg/crypto/sha512/LICENSE
 delete mode 100644 pkg/crypto/sha512/sha512-avx-asm_linux_amd64.S
 delete mode 100644 pkg/crypto/sha512/sha512-avx2-asm_linux_amd64.S
 delete mode 100644 pkg/crypto/sha512/sha512-ssse3-asm_linux_amd64.S
 delete mode 100644 pkg/crypto/sha512/sha512.go
 delete mode 100644 pkg/crypto/sha512/sha512_linux.go
 delete mode 100644 pkg/crypto/sha512/sha512_test.go
 delete mode 100644 pkg/crypto/sha512/sha512block.go
 create mode 100644 vendor/github.com/dchest/blake2b/README
 create mode 100644 vendor/github.com/dchest/blake2b/blake2b.go
 create mode 100644 vendor/github.com/dchest/blake2b/block.go

diff --git a/docs/backend/xl/xl.json b/docs/backend/xl/xl.json
index 333b984ef..08db8c07c 100644
--- a/docs/backend/xl/xl.json
+++ b/docs/backend/xl/xl.json
@@ -37,13 +37,13 @@
         "checksum": [
             {
                 "name": "object1",
-                "algorithm": "sha512",
-                "hash": "d9910e1492446389cfae6fe979db0245f96ca97ca2c7a25cab45805882004479320d866a47ea1f7be6a62625dd4de6caf7816009ef9d62779346d01a221b335c",
+                "algorithm": "blake2b",
+                "hash": "173c2cda7fb9e2798f91ba10135b65475fd2b97684355b328941d1c470a37549502672b2b4630c8268d5e5f79c8eb2fe433a25c6368f31d75b2c0504e3104c0e",
             },
             {
                 "name": "object2",
-                "algorithm": "sha512",
-                "hash": "d9910e1492446389cfae6fe979db0245f96ca97ca2c7a25cab45805882004479320d866a47ea1f7be6a62625dd4de6caf7816009ef9d62779346d01a221b335c",
+                "algorithm": "blake2b",
+                "hash": "14f9ba0006b2db7cd171507e79032ad3a34eab7df02b6564c487614b6d1e6613343a4f56074aa3473ac9c3b26e00cbbeae937d478cca4cfb138e72838ebb0826",
             },
         ],
     },
diff --git a/erasure-createfile.go b/erasure-createfile.go
index edc1332fc..8ae84e902 100644
--- a/erasure-createfile.go
+++ b/erasure-createfile.go
@@ -79,7 +79,7 @@ func erasureCreateFile(disks []StorageAPI, volume string, path string, partName
 		blockIndex := eInfo.Distribution[index] - 1
 		checkSums[blockIndex] = checkSumInfo{
 			Name:      partName,
-			Algorithm: "sha512",
+			Algorithm: "blake2b",
 			Hash:      hex.EncodeToString(hashWriters[blockIndex].Sum(nil)),
 		}
 	}
diff --git a/erasure-utils.go b/erasure-utils.go
index 9daf39325..fec8e0d02 100644
--- a/erasure-utils.go
+++ b/erasure-utils.go
@@ -18,10 +18,10 @@ package main
 
 import (
 	"bytes"
-	"crypto/sha512"
 	"hash"
 	"io"
 
+	"github.com/dchest/blake2b"
 	"github.com/klauspost/reedsolomon"
 )
 
@@ -29,7 +29,7 @@ import (
 func newHashWriters(diskCount int) []hash.Hash {
 	hashWriters := make([]hash.Hash, diskCount)
 	for index := range hashWriters {
-		hashWriters[index] = newHash("sha512")
+		hashWriters[index] = newHash("blake2b")
 	}
 	return hashWriters
 }
@@ -37,11 +37,12 @@ func newHashWriters(diskCount int) []hash.Hash {
 // newHash - gives you a newly allocated hash depending on the input algorithm.
 func newHash(algo string) hash.Hash {
 	switch algo {
-	case "sha512":
-		return sha512.New()
+	case "blake2b":
+		return blake2b.New512()
 	// Add new hashes here.
 	default:
-		return sha512.New()
+		// Default to blake2b.
+		return blake2b.New512()
 	}
 }
 
diff --git a/pkg/crypto/sha512/LICENSE b/pkg/crypto/sha512/LICENSE
deleted file mode 100644
index d64569567..000000000
--- a/pkg/crypto/sha512/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/pkg/crypto/sha512/sha512-avx-asm_linux_amd64.S b/pkg/crypto/sha512/sha512-avx-asm_linux_amd64.S
deleted file mode 100644
index 61971f2de..000000000
--- a/pkg/crypto/sha512/sha512-avx-asm_linux_amd64.S
+++ /dev/null
@@ -1,686 +0,0 @@
-########################################################################
-# Implement fast SHA-512 with AVX instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     David Cote <david.m.cote@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-512 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-# Using this part of Minio codebase under the license
-# Apache License Version 2.0 with modifications
-##
-
-#ifdef HAS_AVX
-#ifndef ENTRY
-#define ENTRY(name) \
-        .globl name             ; \
-        .align 4,0x90           ; \
-        name:
-#endif
-
-#ifndef END
-#define END(name) \
-        .size name, .-name
-#endif
-
-#ifndef ENDPROC
-#define ENDPROC(name) \
-        .type name, @function   ; \
-        END(name)
-#endif
-
-#define NUM_INVALID		100
-
-#define TYPE_R32		0
-#define TYPE_R64		1
-#define TYPE_XMM		2
-#define TYPE_INVALID	100
-
-	.macro R32_NUM opd r32
-	\opd = NUM_INVALID
-	.ifc \r32,%eax
-	\opd = 0
-	.endif
-	.ifc \r32,%ecx
-	\opd = 1
-	.endif
-	.ifc \r32,%edx
-	\opd = 2
-	.endif
-	.ifc \r32,%ebx
-	\opd = 3
-	.endif
-	.ifc \r32,%esp
-	\opd = 4
-	.endif
-	.ifc \r32,%ebp
-	\opd = 5
-	.endif
-	.ifc \r32,%esi
-	\opd = 6
-	.endif
-	.ifc \r32,%edi
-	\opd = 7
-	.endif
-#ifdef X86_64
-	.ifc \r32,%r8d
-	\opd = 8
-	.endif
-	.ifc \r32,%r9d
-	\opd = 9
-	.endif
-	.ifc \r32,%r10d
-	\opd = 10
-	.endif
-	.ifc \r32,%r11d
-	\opd = 11
-	.endif
-	.ifc \r32,%r12d
-	\opd = 12
-	.endif
-	.ifc \r32,%r13d
-	\opd = 13
-	.endif
-	.ifc \r32,%r14d
-	\opd = 14
-	.endif
-	.ifc \r32,%r15d
-	\opd = 15
-	.endif
-#endif
-	.endm
-
-	.macro R64_NUM opd r64
-	\opd = NUM_INVALID
-#ifdef X86_64
-	.ifc \r64,%rax
-	\opd = 0
-	.endif
-	.ifc \r64,%rcx
-	\opd = 1
-	.endif
-	.ifc \r64,%rdx
-	\opd = 2
-	.endif
-	.ifc \r64,%rbx
-	\opd = 3
-	.endif
-	.ifc \r64,%rsp
-	\opd = 4
-	.endif
-	.ifc \r64,%rbp
-	\opd = 5
-	.endif
-	.ifc \r64,%rsi
-	\opd = 6
-	.endif
-	.ifc \r64,%rdi
-	\opd = 7
-	.endif
-	.ifc \r64,%r8
-	\opd = 8
-	.endif
-	.ifc \r64,%r9
-	\opd = 9
-	.endif
-	.ifc \r64,%r10
-	\opd = 10
-	.endif
-	.ifc \r64,%r11
-	\opd = 11
-	.endif
-	.ifc \r64,%r12
-	\opd = 12
-	.endif
-	.ifc \r64,%r13
-	\opd = 13
-	.endif
-	.ifc \r64,%r14
-	\opd = 14
-	.endif
-	.ifc \r64,%r15
-	\opd = 15
-	.endif
-#endif
-	.endm
-
-	.macro XMM_NUM opd xmm
-	\opd = NUM_INVALID
-	.ifc \xmm,%xmm0
-	\opd = 0
-	.endif
-	.ifc \xmm,%xmm1
-	\opd = 1
-	.endif
-	.ifc \xmm,%xmm2
-	\opd = 2
-	.endif
-	.ifc \xmm,%xmm3
-	\opd = 3
-	.endif
-	.ifc \xmm,%xmm4
-	\opd = 4
-	.endif
-	.ifc \xmm,%xmm5
-	\opd = 5
-	.endif
-	.ifc \xmm,%xmm6
-	\opd = 6
-	.endif
-	.ifc \xmm,%xmm7
-	\opd = 7
-	.endif
-	.ifc \xmm,%xmm8
-	\opd = 8
-	.endif
-	.ifc \xmm,%xmm9
-	\opd = 9
-	.endif
-	.ifc \xmm,%xmm10
-	\opd = 10
-	.endif
-	.ifc \xmm,%xmm11
-	\opd = 11
-	.endif
-	.ifc \xmm,%xmm12
-	\opd = 12
-	.endif
-	.ifc \xmm,%xmm13
-	\opd = 13
-	.endif
-	.ifc \xmm,%xmm14
-	\opd = 14
-	.endif
-	.ifc \xmm,%xmm15
-	\opd = 15
-	.endif
-	.endm
-
-	.macro TYPE type reg
-	R32_NUM reg_type_r32 \reg
-	R64_NUM reg_type_r64 \reg
-	XMM_NUM reg_type_xmm \reg
-	.if reg_type_r64 <> NUM_INVALID
-	\type = TYPE_R64
-	.elseif reg_type_r32 <> NUM_INVALID
-	\type = TYPE_R32
-	.elseif reg_type_xmm <> NUM_INVALID
-	\type = TYPE_XMM
-	.else
-	\type = TYPE_INVALID
-	.endif
-	.endm
-
-	.macro PFX_OPD_SIZE
-	.byte 0x66
-	.endm
-
-	.macro PFX_REX opd1 opd2 W=0
-	.if ((\opd1 | \opd2) & 8) || \W
-	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
-	.endif
-	.endm
-
-	.macro MODRM mod opd1 opd2
-	.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
-	.endm
-
-	.macro PSHUFB_XMM xmm1 xmm2
-	XMM_NUM pshufb_opd1 \xmm1
-	XMM_NUM pshufb_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX pshufb_opd1 pshufb_opd2
-	.byte 0x0f, 0x38, 0x00
-	MODRM 0xc0 pshufb_opd1 pshufb_opd2
-	.endm
-
-	.macro PCLMULQDQ imm8 xmm1 xmm2
-	XMM_NUM clmul_opd1 \xmm1
-	XMM_NUM clmul_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX clmul_opd1 clmul_opd2
-	.byte 0x0f, 0x3a, 0x44
-	MODRM 0xc0 clmul_opd1 clmul_opd2
-	.byte \imm8
-	.endm
-
-	.macro PEXTRD imm8 xmm gpr
-	R32_NUM extrd_opd1 \gpr
-	XMM_NUM extrd_opd2 \xmm
-	PFX_OPD_SIZE
-	PFX_REX extrd_opd1 extrd_opd2
-	.byte 0x0f, 0x3a, 0x16
-	MODRM 0xc0 extrd_opd1 extrd_opd2
-	.byte \imm8
-	.endm
-
-	.macro MOVQ_R64_XMM opd1 opd2
-	TYPE movq_r64_xmm_opd1_type \opd1
-	.if movq_r64_xmm_opd1_type == TYPE_XMM
-	XMM_NUM movq_r64_xmm_opd1 \opd1
-	R64_NUM movq_r64_xmm_opd2 \opd2
-	.else
-	R64_NUM movq_r64_xmm_opd1 \opd1
-	XMM_NUM movq_r64_xmm_opd2 \opd2
-	.endif
-	PFX_OPD_SIZE
-	PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
-	.if movq_r64_xmm_opd1_type == TYPE_XMM
-	.byte 0x0f, 0x7e
-	.else
-	.byte 0x0f, 0x6e
-	.endif
-	MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
-	.endm
-
-.text
-
-# Virtual Registers
-# ARG1
-msg	= %rdi
-# ARG2
-digest	= %rsi
-# ARG3
-msglen	= %rdx
-T1	= %rcx
-T2	= %r8
-a_64	= %r9
-b_64	= %r10
-c_64	= %r11
-d_64	= %r12
-e_64	= %r13
-f_64	= %r14
-g_64	= %r15
-h_64	= %rbx
-tmp0	= %rax
-
-# Local variables (stack frame)
-
-# Message Schedule
-W_SIZE = 80*8
-# W[t] + K[t] | W[t+1] + K[t+1]
-WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
-
-frame_W = 0
-frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
-
-# Useful QWORD "arrays" for simpler memory references
-# MSG, DIGEST, K_t, W_t are arrays
-# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
-
-# Input message (arg1)
-#define MSG(i)    8*i(msg)
-
-# Output Digest (arg2)
-#define DIGEST(i) 8*i(digest)
-
-# SHA Constants (static mem)
-#define K_t(i)    8*i+K512(%rip)
-
-# Message Schedule (stack frame)
-#define W_t(i)    8*i+frame_W(%rsp)
-
-# W[t]+K[t] (stack frame)
-#define WK_2(i)   8*((i%2))+frame_WK(%rsp)
-
-.macro RotateState
-	# Rotate symbols a..h right
-	TMP   = h_64
-	h_64  = g_64
-	g_64  = f_64
-	f_64  = e_64
-	e_64  = d_64
-	d_64  = c_64
-	c_64  = b_64
-	b_64  = a_64
-	a_64  = TMP
-.endm
-
-.macro RORQ p1 p2
-	# shld is faster than ror on Sandybridge
-	shld	$(64-\p2), \p1, \p1
-.endm
-
-.macro SHA512_Round rnd
-	# Compute Round %%t
-	mov     f_64, T1          # T1 = f
-	mov     e_64, tmp0        # tmp = e
-	xor     g_64, T1          # T1 = f ^ g
-	RORQ    tmp0, 23   # 41    # tmp = e ror 23
-	and     e_64, T1          # T1 = (f ^ g) & e
-	xor     e_64, tmp0        # tmp = (e ror 23) ^ e
-	xor     g_64, T1          # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
-	idx = \rnd
-	add     WK_2(idx), T1     # W[t] + K[t] from message scheduler
-	RORQ    tmp0, 4   # 18    # tmp = ((e ror 23) ^ e) ror 4
-	xor     e_64, tmp0        # tmp = (((e ror 23) ^ e) ror 4) ^ e
-	mov     a_64, T2          # T2 = a
-	add     h_64, T1          # T1 = CH(e,f,g) + W[t] + K[t] + h
-	RORQ    tmp0, 14  # 14    # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
-	add     tmp0, T1          # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
-	mov     a_64, tmp0        # tmp = a
-	xor     c_64, T2          # T2 = a ^ c
-	and     c_64, tmp0        # tmp = a & c
-	and     b_64, T2          # T2 = (a ^ c) & b
-	xor     tmp0, T2          # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c)
-	mov     a_64, tmp0        # tmp = a
-	RORQ    tmp0, 5  # 39     # tmp = a ror 5
-	xor     a_64, tmp0        # tmp = (a ror 5) ^ a
-	add     T1, d_64          # e(next_state) = d + T1
-	RORQ    tmp0, 6  # 34     # tmp = ((a ror 5) ^ a) ror 6
-	xor     a_64, tmp0        # tmp = (((a ror 5) ^ a) ror 6) ^ a
-	lea     (T1, T2), h_64    # a(next_state) = T1 + Maj(a,b,c)
-	RORQ    tmp0, 28  # 28    # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a)
-	add     tmp0, h_64        # a(next_state) = T1 + Maj(a,b,c) S0(a)
-	RotateState
-.endm
-
-.macro SHA512_2Sched_2Round_avx rnd
-	# Compute rounds t-2 and t-1
-	# Compute message schedule QWORDS t and t+1
-
-	#   Two rounds are computed based on the values for K[t-2]+W[t-2] and
-	# K[t-1]+W[t-1] which were previously stored at WK_2 by the message
-	# scheduler.
-	#   The two new schedule QWORDS are stored at [W_t(t)] and [W_t(t+1)].
-	# They are then added to their respective SHA512 constants at
-	# [K_t(t)] and [K_t(t+1)] and stored at dqword [WK_2(t)]
-	#   For brievity, the comments following vectored instructions only refer to
-	# the first of a pair of QWORDS.
-	# Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]}
-	#   The computation of the message schedule and the rounds are tightly
-	# stitched to take advantage of instruction-level parallelism.
-
-	idx = \rnd - 2
-	vmovdqa	W_t(idx), %xmm4		# XMM4 = W[t-2]
-	idx = \rnd - 15
-	vmovdqu	W_t(idx), %xmm5		# XMM5 = W[t-15]
-	mov	f_64, T1
-	vpsrlq	$61, %xmm4, %xmm0	# XMM0 = W[t-2]>>61
-	mov	e_64, tmp0
-	vpsrlq	$1, %xmm5, %xmm6	# XMM6 = W[t-15]>>1
-	xor	g_64, T1
-	RORQ	tmp0, 23 # 41
-	vpsrlq	$19, %xmm4, %xmm1	# XMM1 = W[t-2]>>19
-	and	e_64, T1
-	xor	e_64, tmp0
-	vpxor	%xmm1, %xmm0, %xmm0	# XMM0 = W[t-2]>>61 ^ W[t-2]>>19
-	xor	g_64, T1
-	idx = \rnd
-	add	WK_2(idx), T1#
-	vpsrlq	$8, %xmm5, %xmm7	# XMM7 = W[t-15]>>8
-	RORQ	tmp0, 4 # 18
-	vpsrlq	$6, %xmm4, %xmm2	# XMM2 = W[t-2]>>6
-	xor	e_64, tmp0
-	mov	a_64, T2
-	add	h_64, T1
-	vpxor	%xmm7, %xmm6, %xmm6	# XMM6 = W[t-15]>>1 ^ W[t-15]>>8
-	RORQ	tmp0, 14 # 14
-	add	tmp0, T1
-	vpsrlq	$7, %xmm5, %xmm8	# XMM8 = W[t-15]>>7
-	mov	a_64, tmp0
-	xor	c_64, T2
-	vpsllq	$(64-61), %xmm4, %xmm3  # XMM3 = W[t-2]<<3
-	and	c_64, tmp0
-	and	b_64, T2
-	vpxor	%xmm3, %xmm2, %xmm2	# XMM2 = W[t-2]>>6 ^ W[t-2]<<3
-	xor	tmp0, T2
-	mov	a_64, tmp0
-	vpsllq	$(64-1), %xmm5, %xmm9	# XMM9 = W[t-15]<<63
-	RORQ	tmp0, 5 # 39
-	vpxor	%xmm9, %xmm8, %xmm8	# XMM8 = W[t-15]>>7 ^ W[t-15]<<63
-	xor	a_64, tmp0
-	add	T1, d_64
-	RORQ	tmp0, 6 # 34
-	xor	a_64, tmp0
-	vpxor	%xmm8, %xmm6, %xmm6	# XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^
-					#  W[t-15]>>7 ^ W[t-15]<<63
-	lea	(T1, T2), h_64
-	RORQ	tmp0, 28 # 28
-	vpsllq	$(64-19), %xmm4, %xmm4  # XMM4 = W[t-2]<<25
-	add	tmp0, h_64
-	RotateState
-	vpxor	%xmm4, %xmm0, %xmm0     # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^
-					#        W[t-2]<<25
-	mov	f_64, T1
-	vpxor	%xmm2, %xmm0, %xmm0     # XMM0 = s1(W[t-2])
-	mov	e_64, tmp0
-	xor	g_64, T1
-	idx = \rnd - 16
-	vpaddq	W_t(idx), %xmm0, %xmm0  # XMM0 = s1(W[t-2]) + W[t-16]
-	idx = \rnd - 7
-	vmovdqu	W_t(idx), %xmm1		# XMM1 = W[t-7]
-	RORQ	tmp0, 23 # 41
-	and	e_64, T1
-	xor	e_64, tmp0
-	xor	g_64, T1
-	vpsllq	$(64-8), %xmm5, %xmm5   # XMM5 = W[t-15]<<56
-	idx = \rnd + 1
-	add	WK_2(idx), T1
-	vpxor	%xmm5, %xmm6, %xmm6     # XMM6 = s0(W[t-15])
-	RORQ	tmp0, 4 # 18
-	vpaddq	%xmm6, %xmm0, %xmm0     # XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15])
-	xor	e_64, tmp0
-	vpaddq	%xmm1, %xmm0, %xmm0     # XMM0 = W[t] = s1(W[t-2]) + W[t-7] +
-					#               s0(W[t-15]) + W[t-16]
-	mov	a_64, T2
-	add	h_64, T1
-	RORQ	tmp0, 14 # 14
-	add	tmp0, T1
-	idx = \rnd
-	vmovdqa	%xmm0, W_t(idx)		# Store W[t]
-	vpaddq	K_t(idx), %xmm0, %xmm0  # Compute W[t]+K[t]
-	vmovdqa	%xmm0, WK_2(idx)	# Store W[t]+K[t] for next rounds
-	mov	a_64, tmp0
-	xor	c_64, T2
-	and	c_64, tmp0
-	and	b_64, T2
-	xor	tmp0, T2
-	mov	a_64, tmp0
-	RORQ	tmp0, 5 # 39
-	xor	a_64, tmp0
-	add	T1, d_64
-	RORQ	tmp0, 6 # 34
-	xor	a_64, tmp0
-	lea	(T1, T2), h_64
-	RORQ	tmp0, 28 # 28
-	add	tmp0, h_64
-	RotateState
-.endm
-
-########################################################################
-# void sha512_transform_avx(const void* M, void* D, u64 L)
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-# message blocks.
-# L is the message length in SHA512 blocks
-########################################################################
-ENTRY(sha512_transform_avx)
-	cmp $0, msglen
-	je nowork
-
-	# Allocate Stack Space
-	mov	%rsp, %rax
-	sub     $frame_size, %rsp
-	and	$~(0x20 - 1), %rsp
-	mov	%rax, frame_RSPSAVE(%rsp)
-
-	# Save GPRs
-	mov     %rbx, frame_GPRSAVE(%rsp)
-	mov     %r12, frame_GPRSAVE +8*1(%rsp)
-	mov     %r13, frame_GPRSAVE +8*2(%rsp)
-	mov     %r14, frame_GPRSAVE +8*3(%rsp)
-	mov     %r15, frame_GPRSAVE +8*4(%rsp)
-
-updateblock:
-
-	# Load state variables
-	mov     DIGEST(0), a_64
-	mov     DIGEST(1), b_64
-	mov     DIGEST(2), c_64
-	mov     DIGEST(3), d_64
-	mov     DIGEST(4), e_64
-	mov     DIGEST(5), f_64
-	mov     DIGEST(6), g_64
-	mov     DIGEST(7), h_64
-
-	t = 0
-	.rept 80/2 + 1
-	# (80 rounds) / (2 rounds/iteration) + (1 iteration)
-	# +1 iteration because the scheduler leads hashing by 1 iteration
-		.if t < 2
-			# BSWAP 2 QWORDS
-			vmovdqa  XMM_QWORD_BSWAP(%rip), %xmm1
-			vmovdqu  MSG(t), %xmm0
-			vpshufb  %xmm1, %xmm0, %xmm0    # BSWAP
-			vmovdqa  %xmm0, W_t(t) # Store Scheduled Pair
-			vpaddq   K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t]
-			vmovdqa  %xmm0, WK_2(t) # Store into WK for rounds
-		.elseif t < 16
-			# BSWAP 2 QWORDS# Compute 2 Rounds
-			vmovdqu  MSG(t), %xmm0
-			vpshufb  %xmm1, %xmm0, %xmm0    # BSWAP
-			SHA512_Round t-2    # Round t-2
-			vmovdqa  %xmm0, W_t(t) # Store Scheduled Pair
-			vpaddq   K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t]
-			SHA512_Round t-1    # Round t-1
-			vmovdqa  %xmm0, WK_2(t)# Store W[t]+K[t] into WK
-		.elseif t < 79
-			# Schedule 2 QWORDS# Compute 2 Rounds
-			SHA512_2Sched_2Round_avx t
-		.else
-			# Compute 2 Rounds
-			SHA512_Round t-2
-			SHA512_Round t-1
-		.endif
-		t = t+2
-	.endr
-
-	# Update digest
-	add     a_64, DIGEST(0)
-	add     b_64, DIGEST(1)
-	add     c_64, DIGEST(2)
-	add     d_64, DIGEST(3)
-	add     e_64, DIGEST(4)
-	add     f_64, DIGEST(5)
-	add     g_64, DIGEST(6)
-	add     h_64, DIGEST(7)
-
-	# Advance to next message block
-	add     $16*8, msg
-	dec     msglen
-	jnz     updateblock
-
-	# Restore GPRs
-	mov     frame_GPRSAVE(%rsp),      %rbx
-	mov     frame_GPRSAVE +8*1(%rsp), %r12
-	mov     frame_GPRSAVE +8*2(%rsp), %r13
-	mov     frame_GPRSAVE +8*3(%rsp), %r14
-	mov     frame_GPRSAVE +8*4(%rsp), %r15
-
-	# Restore Stack Pointer
-	mov	frame_RSPSAVE(%rsp), %rsp
-
-nowork:
-	ret
-ENDPROC(sha512_transform_avx)
-
-########################################################################
-### Binary Data
-
-.data
-
-.align 16
-
-# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
-XMM_QWORD_BSWAP:
-	.octa 0x08090a0b0c0d0e0f0001020304050607
-
-# K[t] used in SHA512 hashing
-K512:
-	.quad 0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad 0x3956c25bf348b538,0x59f111f1b605d019
-	.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad 0xd807aa98a3030242,0x12835b0145706fbe
-	.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad 0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad 0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad 0x06ca6351e003826f,0x142929670a0e6e70
-	.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad 0x81c2c92e47edaee6,0x92722c851482353b
-	.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad 0xd192e819d6ef5218,0xd69906245565a910
-	.quad 0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad 0x90befffa23631e28,0xa4506cebde82bde9
-	.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad 0xca273eceea26619c,0xd186b8c721c0c207
-	.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad 0x113f9804bef90dae,0x1b710b35131c471b
-	.quad 0x28db77f523047d84,0x32caab7b40c72493
-	.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
-#endif
diff --git a/pkg/crypto/sha512/sha512-avx2-asm_linux_amd64.S b/pkg/crypto/sha512/sha512-avx2-asm_linux_amd64.S
deleted file mode 100644
index b3a0c66b5..000000000
--- a/pkg/crypto/sha512/sha512-avx2-asm_linux_amd64.S
+++ /dev/null
@@ -1,1006 +0,0 @@
-########################################################################
-# Implement fast SHA-512 with AVX2 instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     David Cote <david.m.cote@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-512 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-# This code schedules 1 blocks at a time, with 4 lanes per block
-########################################################################
-# Using this part of Minio codebase under the license
-# Apache License Version 2.0 with modifications
-##
-
-#ifdef HAS_AVX2
-#ifndef ENTRY
-#define ENTRY(name) \
-        .globl name             ; \
-        .align 4,0x90           ; \
-        name:
-#endif
-
-#ifndef END
-#define END(name) \
-        .size name, .-name
-#endif
-
-#ifndef ENDPROC
-#define ENDPROC(name) \
-        .type name, @function   ; \
-        END(name)
-#endif
-
-#define NUM_INVALID		100
-
-#define TYPE_R32		0
-#define TYPE_R64		1
-#define TYPE_XMM		2
-#define TYPE_INVALID	100
-
-	.macro R32_NUM opd r32
-	\opd = NUM_INVALID
-	.ifc \r32,%eax
-	\opd = 0
-	.endif
-	.ifc \r32,%ecx
-	\opd = 1
-	.endif
-	.ifc \r32,%edx
-	\opd = 2
-	.endif
-	.ifc \r32,%ebx
-	\opd = 3
-	.endif
-	.ifc \r32,%esp
-	\opd = 4
-	.endif
-	.ifc \r32,%ebp
-	\opd = 5
-	.endif
-	.ifc \r32,%esi
-	\opd = 6
-	.endif
-	.ifc \r32,%edi
-	\opd = 7
-	.endif
-#ifdef X86_64
-	.ifc \r32,%r8d
-	\opd = 8
-	.endif
-	.ifc \r32,%r9d
-	\opd = 9
-	.endif
-	.ifc \r32,%r10d
-	\opd = 10
-	.endif
-	.ifc \r32,%r11d
-	\opd = 11
-	.endif
-	.ifc \r32,%r12d
-	\opd = 12
-	.endif
-	.ifc \r32,%r13d
-	\opd = 13
-	.endif
-	.ifc \r32,%r14d
-	\opd = 14
-	.endif
-	.ifc \r32,%r15d
-	\opd = 15
-	.endif
-#endif
-	.endm
-
-	.macro R64_NUM opd r64
-	\opd = NUM_INVALID
-#ifdef X86_64
-	.ifc \r64,%rax
-	\opd = 0
-	.endif
-	.ifc \r64,%rcx
-	\opd = 1
-	.endif
-	.ifc \r64,%rdx
-	\opd = 2
-	.endif
-	.ifc \r64,%rbx
-	\opd = 3
-	.endif
-	.ifc \r64,%rsp
-	\opd = 4
-	.endif
-	.ifc \r64,%rbp
-	\opd = 5
-	.endif
-	.ifc \r64,%rsi
-	\opd = 6
-	.endif
-	.ifc \r64,%rdi
-	\opd = 7
-	.endif
-	.ifc \r64,%r8
-	\opd = 8
-	.endif
-	.ifc \r64,%r9
-	\opd = 9
-	.endif
-	.ifc \r64,%r10
-	\opd = 10
-	.endif
-	.ifc \r64,%r11
-	\opd = 11
-	.endif
-	.ifc \r64,%r12
-	\opd = 12
-	.endif
-	.ifc \r64,%r13
-	\opd = 13
-	.endif
-	.ifc \r64,%r14
-	\opd = 14
-	.endif
-	.ifc \r64,%r15
-	\opd = 15
-	.endif
-#endif
-	.endm
-
-	.macro XMM_NUM opd xmm
-	\opd = NUM_INVALID
-	.ifc \xmm,%xmm0
-	\opd = 0
-	.endif
-	.ifc \xmm,%xmm1
-	\opd = 1
-	.endif
-	.ifc \xmm,%xmm2
-	\opd = 2
-	.endif
-	.ifc \xmm,%xmm3
-	\opd = 3
-	.endif
-	.ifc \xmm,%xmm4
-	\opd = 4
-	.endif
-	.ifc \xmm,%xmm5
-	\opd = 5
-	.endif
-	.ifc \xmm,%xmm6
-	\opd = 6
-	.endif
-	.ifc \xmm,%xmm7
-	\opd = 7
-	.endif
-	.ifc \xmm,%xmm8
-	\opd = 8
-	.endif
-	.ifc \xmm,%xmm9
-	\opd = 9
-	.endif
-	.ifc \xmm,%xmm10
-	\opd = 10
-	.endif
-	.ifc \xmm,%xmm11
-	\opd = 11
-	.endif
-	.ifc \xmm,%xmm12
-	\opd = 12
-	.endif
-	.ifc \xmm,%xmm13
-	\opd = 13
-	.endif
-	.ifc \xmm,%xmm14
-	\opd = 14
-	.endif
-	.ifc \xmm,%xmm15
-	\opd = 15
-	.endif
-	.endm
-
-	.macro TYPE type reg
-	R32_NUM reg_type_r32 \reg
-	R64_NUM reg_type_r64 \reg
-	XMM_NUM reg_type_xmm \reg
-	.if reg_type_r64 <> NUM_INVALID
-	\type = TYPE_R64
-	.elseif reg_type_r32 <> NUM_INVALID
-	\type = TYPE_R32
-	.elseif reg_type_xmm <> NUM_INVALID
-	\type = TYPE_XMM
-	.else
-	\type = TYPE_INVALID
-	.endif
-	.endm
-
-	.macro PFX_OPD_SIZE
-	.byte 0x66
-	.endm
-
-	.macro PFX_REX opd1 opd2 W=0
-	.if ((\opd1 | \opd2) & 8) || \W
-	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
-	.endif
-	.endm
-
-	.macro MODRM mod opd1 opd2
-	.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
-	.endm
-
-	.macro PSHUFB_XMM xmm1 xmm2
-	XMM_NUM pshufb_opd1 \xmm1
-	XMM_NUM pshufb_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX pshufb_opd1 pshufb_opd2
-	.byte 0x0f, 0x38, 0x00
-	MODRM 0xc0 pshufb_opd1 pshufb_opd2
-	.endm
-
-	.macro PCLMULQDQ imm8 xmm1 xmm2
-	XMM_NUM clmul_opd1 \xmm1
-	XMM_NUM clmul_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX clmul_opd1 clmul_opd2
-	.byte 0x0f, 0x3a, 0x44
-	MODRM 0xc0 clmul_opd1 clmul_opd2
-	.byte \imm8
-	.endm
-
-	.macro PEXTRD imm8 xmm gpr
-	R32_NUM extrd_opd1 \gpr
-	XMM_NUM extrd_opd2 \xmm
-	PFX_OPD_SIZE
-	PFX_REX extrd_opd1 extrd_opd2
-	.byte 0x0f, 0x3a, 0x16
-	MODRM 0xc0 extrd_opd1 extrd_opd2
-	.byte \imm8
-	.endm
-
-	.macro MOVQ_R64_XMM opd1 opd2
-	TYPE movq_r64_xmm_opd1_type \opd1
-	.if movq_r64_xmm_opd1_type == TYPE_XMM
-	XMM_NUM movq_r64_xmm_opd1 \opd1
-	R64_NUM movq_r64_xmm_opd2 \opd2
-	.else
-	R64_NUM movq_r64_xmm_opd1 \opd1
-	XMM_NUM movq_r64_xmm_opd2 \opd2
-	.endif
-	PFX_OPD_SIZE
-	PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
-	.if movq_r64_xmm_opd1_type == TYPE_XMM
-	.byte 0x0f, 0x7e
-	.else
-	.byte 0x0f, 0x6e
-	.endif
-	MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
-	.endm
-
-.text
-
-# Virtual Registers
-Y_0 = %ymm4
-Y_1 = %ymm5
-Y_2 = %ymm6
-Y_3 = %ymm7
-
-YTMP0 = %ymm0
-YTMP1 = %ymm1
-YTMP2 = %ymm2
-YTMP3 = %ymm3
-YTMP4 = %ymm8
-XFER  = YTMP0
-
-BYTE_FLIP_MASK  = %ymm9
-
-# 1st arg
-INP         = %rdi
-# 2nd arg
-CTX         = %rsi
-# 3rd arg
-NUM_BLKS    = %rdx
-
-c           = %rcx
-d           = %r8
-e           = %rdx
-y3          = %rdi
-
-TBL   = %rbp
-
-a     = %rax
-b     = %rbx
-
-f     = %r9
-g     = %r10
-h     = %r11
-old_h = %r11
-
-T1    = %r12
-y0    = %r13
-y1    = %r14
-y2    = %r15
-
-y4    = %r12
-
-# Local variables (stack frame)
-XFER_SIZE = 4*8
-SRND_SIZE = 1*8
-INP_SIZE = 1*8
-INPEND_SIZE = 1*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 6*8
-
-frame_XFER = 0
-frame_SRND = frame_XFER + XFER_SIZE
-frame_INP = frame_SRND + SRND_SIZE
-frame_INPEND = frame_INP + INP_SIZE
-frame_RSPSAVE = frame_INPEND + INPEND_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
-
-## assume buffers not aligned
-#define	VMOVDQ vmovdqu
-
-# addm [mem], reg
-# Add reg to mem using reg-mem add and store
-.macro addm p1 p2
-	add	\p1, \p2
-	mov	\p2, \p1
-.endm
-
-
-# COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask
-# Load ymm with mem and byte swap each dword
-.macro COPY_YMM_AND_BSWAP p1 p2 p3
-	VMOVDQ \p2, \p1
-	vpshufb \p3, \p1, \p1
-.endm
-# rotate_Ys
-# Rotate values of symbols Y0...Y3
-.macro rotate_Ys
-	Y_ = Y_0
-	Y_0 = Y_1
-	Y_1 = Y_2
-	Y_2 = Y_3
-	Y_3 = Y_
-.endm
-
-# RotateState
-.macro RotateState
-	# Rotate symbols a..h right
-	old_h  = h
-	TMP_   = h
-	h      = g
-	g      = f
-	f      = e
-	e      = d
-	d      = c
-	c      = b
-	b      = a
-	a      = TMP_
-.endm
-
-# macro MY_VPALIGNR	YDST, YSRC1, YSRC2, RVAL
-# YDST = {YSRC1, YSRC2} >> RVAL*8
-.macro MY_VPALIGNR YDST YSRC1 YSRC2 RVAL
-	vperm2f128      $0x3, \YSRC2, \YSRC1, \YDST     # YDST = {YS1_LO, YS2_HI}
-	vpalignr        $\RVAL, \YSRC2, \YDST, \YDST    # YDST = {YDS1, YS2} >> RVAL*8
-.endm
-
-.macro FOUR_ROUNDS_AND_SCHED
-################################### RND N + 0 #########################################
-
-	# Extract w[t-7]
-	MY_VPALIGNR	YTMP0, Y_3, Y_2, 8		# YTMP0 = W[-7]
-	# Calculate w[t-16] + w[t-7]
-	vpaddq		Y_0, YTMP0, YTMP0		# YTMP0 = W[-7] + W[-16]
-	# Extract w[t-15]
-	MY_VPALIGNR	YTMP1, Y_1, Y_0, 8		# YTMP1 = W[-15]
-
-	# Calculate sigma0
-
-	# Calculate w[t-15] ror 1
-	vpsrlq		$1, YTMP1, YTMP2
-	vpsllq		$(64-1), YTMP1, YTMP3
-	vpor		YTMP2, YTMP3, YTMP3		# YTMP3 = W[-15] ror 1
-	# Calculate w[t-15] shr 7
-	vpsrlq		$7, YTMP1, YTMP4		# YTMP4 = W[-15] >> 7
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	add	frame_XFER(%rsp),h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	add	h, d		# d = k + w + h + d                     # --
-
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-
-	add	y0, y2		# y2 = S1 + CH                          # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-################################### RND N + 1 #########################################
-
-	# Calculate w[t-15] ror 8
-	vpsrlq		$8, YTMP1, YTMP2
-	vpsllq		$(64-8), YTMP1, YTMP1
-	vpor		YTMP2, YTMP1, YTMP1		# YTMP1 = W[-15] ror 8
-	# XOR the three components
-	vpxor		YTMP4, YTMP3, YTMP3		# YTMP3 = W[-15] ror 1 ^ W[-15] >> 7
-	vpxor		YTMP1, YTMP3, YTMP1		# YTMP1 = s0
-
-
-	# Add three components, w[t-16], w[t-7] and sigma0
-	vpaddq		YTMP1, YTMP0, YTMP0		# YTMP0 = W[-16] + W[-7] + s0
-	# Move to appropriate lanes for calculating w[16] and w[17]
-	vperm2f128	$0x0, YTMP0, YTMP0, Y_0		# Y_0 = W[-16] + W[-7] + s0 {BABA}
-	# Move to appropriate lanes for calculating w[18] and w[19]
-	vpand		MASK_YMM_LO(%rip), YTMP0, YTMP0	# YTMP0 = W[-16] + W[-7] + s0 {DC00}
-
-	# Calculate w[16] and w[17] in both 128 bit lanes
-
-	# Calculate sigma1 for w[16] and w[17] on both 128 bit lanes
-	vperm2f128	$0x11, Y_3, Y_3, YTMP2		# YTMP2 = W[-2] {BABA}
-	vpsrlq		$6, YTMP2, YTMP4		# YTMP4 = W[-2] >> 6 {BABA}
-
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	add	1*8+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-
-
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	h, d		# d = k + w + h + d                     # --
-
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-
-################################### RND N + 2 #########################################
-
-	vpsrlq		$19, YTMP2, YTMP3		# YTMP3 = W[-2] >> 19 {BABA}
-	vpsllq		$(64-19), YTMP2, YTMP1		# YTMP1 = W[-2] << 19 {BABA}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 19 {BABA}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {BABA}
-	vpsrlq		$61, YTMP2, YTMP3		# YTMP3 = W[-2] >> 61 {BABA}
-	vpsllq		$(64-61), YTMP2, YTMP1		# YTMP1 = W[-2] << 61 {BABA}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 61 {BABA}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = s1 = (W[-2] ror 19) ^
-							#  (W[-2] ror 61) ^ (W[-2] >> 6) {BABA}
-
-	# Add sigma1 to the other compunents to get w[16] and w[17]
-	vpaddq		YTMP4, Y_0, Y_0			# Y_0 = {W[1], W[0], W[1], W[0]}
-
-	# Calculate sigma1 for w[18] and w[19] for upper 128 bit lane
-	vpsrlq		$6, Y_0, YTMP4			# YTMP4 = W[-2] >> 6 {DC--}
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	add	2*8+frame_XFER(%rsp), h		# h = k + w + h         # --
-
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	or	c, y3		# y3 = a|c                              # MAJA
-	mov	f, y2		# y2 = f                                # CH
-	xor	g, y2		# y2 = f^g                              # CH
-
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	add	h, d		# d = k + w + h + d                     # --
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-################################### RND N + 3 #########################################
-
-	vpsrlq		$19, Y_0, YTMP3			# YTMP3 = W[-2] >> 19 {DC--}
-	vpsllq		$(64-19), Y_0, YTMP1		# YTMP1 = W[-2] << 19 {DC--}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 19 {DC--}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {DC--}
-	vpsrlq		$61, Y_0, YTMP3			# YTMP3 = W[-2] >> 61 {DC--}
-	vpsllq		$(64-61), Y_0, YTMP1		# YTMP1 = W[-2] << 61 {DC--}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 61 {DC--}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = s1 = (W[-2] ror 19) ^
-							#  (W[-2] ror 61) ^ (W[-2] >> 6) {DC--}
-
-	# Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19]
-	# to newly calculated sigma1 to get w[18] and w[19]
-	vpaddq		YTMP4, YTMP0, YTMP2		# YTMP2 = {W[3], W[2], --, --}
-
-	# Form w[19, w[18], w17], w[16]
-	vpblendd		$0xF0, YTMP2, Y_0, Y_0		# Y_0 = {W[3], W[2], W[1], W[0]}
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	add	3*8+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-
-
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	h, d		# d = k + w + h + d                     # --
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-
-	add	y1, h		# h = k + w + h + S0                    # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-	rotate_Ys
-.endm
-
-.macro DO_4ROUNDS
-
-################################### RND N + 0 #########################################
-
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	RotateState
-
-################################### RND N + 1 #########################################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	8*1+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	RotateState
-
-################################### RND N + 2 #########################################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	8*2+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	RotateState
-
-################################### RND N + 3 #########################################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	8*3+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-.endm
-
-########################################################################
-# void sha512_transform_rorx(const void* M, void* D, uint64_t L)#
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-#   message blocks.
-# L is the message length in SHA512 blocks
-########################################################################
-ENTRY(sha512_transform_rorx)
-	# Allocate Stack Space
-	mov	%rsp, %rax
-	sub	$frame_size, %rsp
-	and	$~(0x20 - 1), %rsp
-	mov	%rax, frame_RSPSAVE(%rsp)
-
-	# Save GPRs
-	mov	%rbp, frame_GPRSAVE(%rsp)
-	mov	%rbx, 8*1+frame_GPRSAVE(%rsp)
-	mov	%r12, 8*2+frame_GPRSAVE(%rsp)
-	mov	%r13, 8*3+frame_GPRSAVE(%rsp)
-	mov	%r14, 8*4+frame_GPRSAVE(%rsp)
-	mov	%r15, 8*5+frame_GPRSAVE(%rsp)
-
-	shl	$7, NUM_BLKS	# convert to bytes
-	jz	done_hash
-	add	INP, NUM_BLKS	# pointer to end of data
-	mov	NUM_BLKS, frame_INPEND(%rsp)
-
-	## load initial digest
-	mov	8*0(CTX),a
-	mov	8*1(CTX),b
-	mov	8*2(CTX),c
-	mov	8*3(CTX),d
-	mov	8*4(CTX),e
-	mov	8*5(CTX),f
-	mov	8*6(CTX),g
-	mov	8*7(CTX),h
-
-	vmovdqa	PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
-
-loop0:
-	lea	K512(%rip), TBL
-
-	## byte swap first 16 dwords
-	COPY_YMM_AND_BSWAP	Y_0, (INP), BYTE_FLIP_MASK
-	COPY_YMM_AND_BSWAP	Y_1, 1*32(INP), BYTE_FLIP_MASK
-	COPY_YMM_AND_BSWAP	Y_2, 2*32(INP), BYTE_FLIP_MASK
-	COPY_YMM_AND_BSWAP	Y_3, 3*32(INP), BYTE_FLIP_MASK
-
-	mov	INP, frame_INP(%rsp)
-
-	## schedule 64 input dwords, by doing 12 rounds of 4 each
-	movq	$4, frame_SRND(%rsp)
-
-.align 16
-loop1:
-	vpaddq	(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddq	1*32(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddq	2*32(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddq	3*32(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	add	$(4*32), TBL
-	FOUR_ROUNDS_AND_SCHED
-
-	subq	$1, frame_SRND(%rsp)
-	jne	loop1
-
-	movq	$2, frame_SRND(%rsp)
-loop2:
-	vpaddq	(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	DO_4ROUNDS
-	vpaddq	1*32(TBL), Y_1, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	add	$(2*32), TBL
-	DO_4ROUNDS
-
-	vmovdqa	Y_2, Y_0
-	vmovdqa	Y_3, Y_1
-
-	subq	$1, frame_SRND(%rsp)
-	jne	loop2
-
-	addm	8*0(CTX),a
-	addm	8*1(CTX),b
-	addm	8*2(CTX),c
-	addm	8*3(CTX),d
-	addm	8*4(CTX),e
-	addm	8*5(CTX),f
-	addm	8*6(CTX),g
-	addm	8*7(CTX),h
-
-	mov	frame_INP(%rsp), INP
-	add	$128, INP
-	cmp	frame_INPEND(%rsp), INP
-	jne	loop0
-
-done_hash:
-
-# Restore GPRs
-	mov	frame_GPRSAVE(%rsp)     ,%rbp
-	mov	8*1+frame_GPRSAVE(%rsp) ,%rbx
-	mov	8*2+frame_GPRSAVE(%rsp) ,%r12
-	mov	8*3+frame_GPRSAVE(%rsp) ,%r13
-	mov	8*4+frame_GPRSAVE(%rsp) ,%r14
-	mov	8*5+frame_GPRSAVE(%rsp) ,%r15
-
-	# Restore Stack Pointer
-	mov	frame_RSPSAVE(%rsp), %rsp
-	ret
-ENDPROC(sha512_transform_rorx)
-
-########################################################################
-### Binary Data
-
-.data
-
-.align 64
-# K[t] used in SHA512 hashing
-K512:
-	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad	0x3956c25bf348b538,0x59f111f1b605d019
-	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad	0xd807aa98a3030242,0x12835b0145706fbe
-	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad	0x06ca6351e003826f,0x142929670a0e6e70
-	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad	0x81c2c92e47edaee6,0x92722c851482353b
-	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad	0xd192e819d6ef5218,0xd69906245565a910
-	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad	0x90befffa23631e28,0xa4506cebde82bde9
-	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad	0xca273eceea26619c,0xd186b8c721c0c207
-	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad	0x113f9804bef90dae,0x1b710b35131c471b
-	.quad	0x28db77f523047d84,0x32caab7b40c72493
-	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-
-.align 32
-
-# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
-PSHUFFLE_BYTE_FLIP_MASK:
-	.octa 0x08090a0b0c0d0e0f0001020304050607
-	.octa 0x18191a1b1c1d1e1f1011121314151617
-
-MASK_YMM_LO:
-	.octa 0x00000000000000000000000000000000
-	.octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
-#endif
diff --git a/pkg/crypto/sha512/sha512-ssse3-asm_linux_amd64.S b/pkg/crypto/sha512/sha512-ssse3-asm_linux_amd64.S
deleted file mode 100644
index 57c06caea..000000000
--- a/pkg/crypto/sha512/sha512-ssse3-asm_linux_amd64.S
+++ /dev/null
@@ -1,686 +0,0 @@
-########################################################################
-# Implement fast SHA-512 with SSSE3 instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     David Cote <david.m.cote@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-512 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-# Using this part of Minio codebase under the license
-# Apache License Version 2.0 with modifications
-##
-
-#ifdef HAS_SSE41
-#ifndef ENTRY
-#define ENTRY(name) \
-        .globl name             ; \
-        .align 4,0x90           ; \
-        name:
-#endif
-
-#ifndef END
-#define END(name) \
-        .size name, .-name
-#endif
-
-#ifndef ENDPROC
-#define ENDPROC(name) \
-        .type name, @function   ; \
-        END(name)
-#endif
-
-#define NUM_INVALID		100
-
-#define TYPE_R32		0
-#define TYPE_R64		1
-#define TYPE_XMM		2
-#define TYPE_INVALID	100
-
-	.macro R32_NUM opd r32
-	\opd = NUM_INVALID
-	.ifc \r32,%eax
-	\opd = 0
-	.endif
-	.ifc \r32,%ecx
-	\opd = 1
-	.endif
-	.ifc \r32,%edx
-	\opd = 2
-	.endif
-	.ifc \r32,%ebx
-	\opd = 3
-	.endif
-	.ifc \r32,%esp
-	\opd = 4
-	.endif
-	.ifc \r32,%ebp
-	\opd = 5
-	.endif
-	.ifc \r32,%esi
-	\opd = 6
-	.endif
-	.ifc \r32,%edi
-	\opd = 7
-	.endif
-#ifdef X86_64
-	.ifc \r32,%r8d
-	\opd = 8
-	.endif
-	.ifc \r32,%r9d
-	\opd = 9
-	.endif
-	.ifc \r32,%r10d
-	\opd = 10
-	.endif
-	.ifc \r32,%r11d
-	\opd = 11
-	.endif
-	.ifc \r32,%r12d
-	\opd = 12
-	.endif
-	.ifc \r32,%r13d
-	\opd = 13
-	.endif
-	.ifc \r32,%r14d
-	\opd = 14
-	.endif
-	.ifc \r32,%r15d
-	\opd = 15
-	.endif
-#endif
-	.endm
-
-	.macro R64_NUM opd r64
-	\opd = NUM_INVALID
-#ifdef X86_64
-	.ifc \r64,%rax
-	\opd = 0
-	.endif
-	.ifc \r64,%rcx
-	\opd = 1
-	.endif
-	.ifc \r64,%rdx
-	\opd = 2
-	.endif
-	.ifc \r64,%rbx
-	\opd = 3
-	.endif
-	.ifc \r64,%rsp
-	\opd = 4
-	.endif
-	.ifc \r64,%rbp
-	\opd = 5
-	.endif
-	.ifc \r64,%rsi
-	\opd = 6
-	.endif
-	.ifc \r64,%rdi
-	\opd = 7
-	.endif
-	.ifc \r64,%r8
-	\opd = 8
-	.endif
-	.ifc \r64,%r9
-	\opd = 9
-	.endif
-	.ifc \r64,%r10
-	\opd = 10
-	.endif
-	.ifc \r64,%r11
-	\opd = 11
-	.endif
-	.ifc \r64,%r12
-	\opd = 12
-	.endif
-	.ifc \r64,%r13
-	\opd = 13
-	.endif
-	.ifc \r64,%r14
-	\opd = 14
-	.endif
-	.ifc \r64,%r15
-	\opd = 15
-	.endif
-#endif
-	.endm
-
-	.macro XMM_NUM opd xmm
-	\opd = NUM_INVALID
-	.ifc \xmm,%xmm0
-	\opd = 0
-	.endif
-	.ifc \xmm,%xmm1
-	\opd = 1
-	.endif
-	.ifc \xmm,%xmm2
-	\opd = 2
-	.endif
-	.ifc \xmm,%xmm3
-	\opd = 3
-	.endif
-	.ifc \xmm,%xmm4
-	\opd = 4
-	.endif
-	.ifc \xmm,%xmm5
-	\opd = 5
-	.endif
-	.ifc \xmm,%xmm6
-	\opd = 6
-	.endif
-	.ifc \xmm,%xmm7
-	\opd = 7
-	.endif
-	.ifc \xmm,%xmm8
-	\opd = 8
-	.endif
-	.ifc \xmm,%xmm9
-	\opd = 9
-	.endif
-	.ifc \xmm,%xmm10
-	\opd = 10
-	.endif
-	.ifc \xmm,%xmm11
-	\opd = 11
-	.endif
-	.ifc \xmm,%xmm12
-	\opd = 12
-	.endif
-	.ifc \xmm,%xmm13
-	\opd = 13
-	.endif
-	.ifc \xmm,%xmm14
-	\opd = 14
-	.endif
-	.ifc \xmm,%xmm15
-	\opd = 15
-	.endif
-	.endm
-
-	.macro TYPE type reg
-	R32_NUM reg_type_r32 \reg
-	R64_NUM reg_type_r64 \reg
-	XMM_NUM reg_type_xmm \reg
-	.if reg_type_r64 <> NUM_INVALID
-	\type = TYPE_R64
-	.elseif reg_type_r32 <> NUM_INVALID
-	\type = TYPE_R32
-	.elseif reg_type_xmm <> NUM_INVALID
-	\type = TYPE_XMM
-	.else
-	\type = TYPE_INVALID
-	.endif
-	.endm
-
-	.macro PFX_OPD_SIZE
-	.byte 0x66
-	.endm
-
-	.macro PFX_REX opd1 opd2 W=0
-	.if ((\opd1 | \opd2) & 8) || \W
-	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
-	.endif
-	.endm
-
-	.macro MODRM mod opd1 opd2
-	.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
-	.endm
-
-	.macro PSHUFB_XMM xmm1 xmm2
-	XMM_NUM pshufb_opd1 \xmm1
-	XMM_NUM pshufb_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX pshufb_opd1 pshufb_opd2
-	.byte 0x0f, 0x38, 0x00
-	MODRM 0xc0 pshufb_opd1 pshufb_opd2
-	.endm
-
-	.macro PCLMULQDQ imm8 xmm1 xmm2
-	XMM_NUM clmul_opd1 \xmm1
-	XMM_NUM clmul_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX clmul_opd1 clmul_opd2
-	.byte 0x0f, 0x3a, 0x44
-	MODRM 0xc0 clmul_opd1 clmul_opd2
-	.byte \imm8
-	.endm
-
-	.macro PEXTRD imm8 xmm gpr
-	R32_NUM extrd_opd1 \gpr
-	XMM_NUM extrd_opd2 \xmm
-	PFX_OPD_SIZE
-	PFX_REX extrd_opd1 extrd_opd2
-	.byte 0x0f, 0x3a, 0x16
-	MODRM 0xc0 extrd_opd1 extrd_opd2
-	.byte \imm8
-	.endm
-
-	.macro MOVQ_R64_XMM opd1 opd2
-	TYPE movq_r64_xmm_opd1_type \opd1
-	.if movq_r64_xmm_opd1_type == TYPE_XMM
-	XMM_NUM movq_r64_xmm_opd1 \opd1
-	R64_NUM movq_r64_xmm_opd2 \opd2
-	.else
-	R64_NUM movq_r64_xmm_opd1 \opd1
-	XMM_NUM movq_r64_xmm_opd2 \opd2
-	.endif
-	PFX_OPD_SIZE
-	PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
-	.if movq_r64_xmm_opd1_type == TYPE_XMM
-	.byte 0x0f, 0x7e
-	.else
-	.byte 0x0f, 0x6e
-	.endif
-	MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
-	.endm
-
-.text
-
-# Virtual Registers
-# ARG1
-msg =		%rdi
-# ARG2
-digest =	%rsi
-# ARG3
-msglen =	%rdx
-T1 =		%rcx
-T2 =		%r8
-a_64 =		%r9
-b_64 =		%r10
-c_64 =		%r11
-d_64 =		%r12
-e_64 =		%r13
-f_64 =		%r14
-g_64 =		%r15
-h_64 =		%rbx
-tmp0 =		%rax
-
-# Local variables (stack frame)
-
-W_SIZE = 80*8
-WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
-
-frame_W = 0
-frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
-
-# Useful QWORD "arrays" for simpler memory references
-# MSG, DIGEST, K_t, W_t are arrays
-# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
-
-# Input message (arg1)
-#define MSG(i)    8*i(msg)
-
-# Output Digest (arg2)
-#define DIGEST(i) 8*i(digest)
-
-# SHA Constants (static mem)
-#define K_t(i)    8*i+K512(%rip)
-
-# Message Schedule (stack frame)
-#define W_t(i)    8*i+frame_W(%rsp)
-
-# W[t]+K[t] (stack frame)
-#define WK_2(i)   8*((i%2))+frame_WK(%rsp)
-
-.macro RotateState
-	# Rotate symbols a..h right
-	TMP   = h_64
-	h_64  = g_64
-	g_64  = f_64
-	f_64  = e_64
-	e_64  = d_64
-	d_64  = c_64
-	c_64  = b_64
-	b_64  = a_64
-	a_64  = TMP
-.endm
-
-.macro SHA512_Round rnd
-
-	# Compute Round %%t
-	mov	f_64, T1          # T1 = f
-	mov	e_64, tmp0        # tmp = e
-	xor	g_64, T1          # T1 = f ^ g
-	ror	$23, tmp0 # 41    # tmp = e ror 23
-	and	e_64, T1          # T1 = (f ^ g) & e
-	xor	e_64, tmp0        # tmp = (e ror 23) ^ e
-	xor	g_64, T1          # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
-	idx = \rnd
-	add	WK_2(idx), T1     # W[t] + K[t] from message scheduler
-	ror	$4, tmp0  # 18    # tmp = ((e ror 23) ^ e) ror 4
-	xor	e_64, tmp0        # tmp = (((e ror 23) ^ e) ror 4) ^ e
-	mov	a_64, T2          # T2 = a
-	add	h_64, T1          # T1 = CH(e,f,g) + W[t] + K[t] + h
-	ror	$14, tmp0 # 14    # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
-	add	tmp0, T1          # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
-	mov	a_64, tmp0        # tmp = a
-	xor	c_64, T2          # T2 = a ^ c
-	and	c_64, tmp0        # tmp = a & c
-	and	b_64, T2          # T2 = (a ^ c) & b
-	xor	tmp0, T2          # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c)
-	mov	a_64, tmp0        # tmp = a
-	ror	$5, tmp0 # 39     # tmp = a ror 5
-	xor	a_64, tmp0        # tmp = (a ror 5) ^ a
-	add	T1, d_64          # e(next_state) = d + T1
-	ror	$6, tmp0 # 34     # tmp = ((a ror 5) ^ a) ror 6
-	xor	a_64, tmp0        # tmp = (((a ror 5) ^ a) ror 6) ^ a
-	lea	(T1, T2), h_64    # a(next_state) = T1 + Maj(a,b,c)
-	ror	$28, tmp0 # 28    # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a)
-	add	tmp0, h_64        # a(next_state) = T1 + Maj(a,b,c) S0(a)
-	RotateState
-.endm
-
-.macro SHA512_2Sched_2Round_sse rnd
-
-	# Compute rounds t-2 and t-1
-	# Compute message schedule QWORDS t and t+1
-
-	#   Two rounds are computed based on the values for K[t-2]+W[t-2] and
-	# K[t-1]+W[t-1] which were previously stored at WK_2 by the message
-	# scheduler.
-	#   The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)].
-	# They are then added to their respective SHA512 constants at
-	# [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)]
-	#   For brievity, the comments following vectored instructions only refer to
-	# the first of a pair of QWORDS.
-	# Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]}
-	#   The computation of the message schedule and the rounds are tightly
-	# stitched to take advantage of instruction-level parallelism.
-	# For clarity, integer instructions (for the rounds calculation) are indented
-	# by one tab. Vectored instructions (for the message scheduler) are indented
-	# by two tabs.
-
-	mov	f_64, T1
-	idx = \rnd -2
-	movdqa	W_t(idx), %xmm2		    # XMM2 = W[t-2]
-	xor	g_64, T1
-	and	e_64, T1
-	movdqa	%xmm2, %xmm0	            # XMM0 = W[t-2]
-	xor	g_64, T1
-	idx = \rnd
-	add	WK_2(idx), T1
-	idx = \rnd - 15
-	movdqu	W_t(idx), %xmm5		    # XMM5 = W[t-15]
-	mov	e_64, tmp0
-	ror	$23, tmp0 # 41
-	movdqa	%xmm5, %xmm3	            # XMM3 = W[t-15]
-	xor	e_64, tmp0
-	ror	$4, tmp0 # 18
-	psrlq	$61-19, %xmm0		    # XMM0 = W[t-2] >> 42
-	xor	e_64, tmp0
-	ror	$14, tmp0 # 14
-	psrlq	$(8-7), %xmm3		    # XMM3 = W[t-15] >> 1
-	add	tmp0, T1
-	add	h_64, T1
-	pxor	%xmm2, %xmm0                # XMM0 = (W[t-2] >> 42) ^ W[t-2]
-	mov	a_64, T2
-	xor	c_64, T2
-	pxor	%xmm5, %xmm3                # XMM3 = (W[t-15] >> 1) ^ W[t-15]
-	and	b_64, T2
-	mov	a_64, tmp0
-	psrlq	$(19-6), %xmm0		    # XMM0 = ((W[t-2]>>42)^W[t-2])>>13
-	and	c_64, tmp0
-	xor	tmp0, T2
-	psrlq	$(7-1), %xmm3		    # XMM3 = ((W[t-15]>>1)^W[t-15])>>6
-	mov	a_64, tmp0
-	ror	$5, tmp0 # 39
-	pxor	%xmm2, %xmm0	            # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2]
-	xor	a_64, tmp0
-	ror	$6, tmp0 # 34
-	pxor	%xmm5, %xmm3                # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]
-	xor	a_64, tmp0
-	ror	$28, tmp0 # 28
-	psrlq	$6, %xmm0                   # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6
-	add	tmp0, T2
-	add	T1, d_64
-	psrlq	$1, %xmm3                   # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1
-	lea	(T1, T2), h_64
-	RotateState
-	movdqa	%xmm2, %xmm1	            # XMM1 = W[t-2]
-	mov	f_64, T1
-	xor	g_64, T1
-	movdqa	%xmm5, %xmm4		    # XMM4 = W[t-15]
-	and	e_64, T1
-	xor	g_64, T1
-	psllq	$(64-19)-(64-61) , %xmm1    # XMM1 = W[t-2] << 42
-	idx = \rnd + 1
-	add	WK_2(idx), T1
-	mov	e_64, tmp0
-	psllq	$(64-1)-(64-8), %xmm4	    # XMM4 = W[t-15] << 7
-	ror	$23, tmp0 # 41
-	xor	e_64, tmp0
-	pxor	%xmm2, %xmm1		    # XMM1 = (W[t-2] << 42)^W[t-2]
-	ror	$4, tmp0 # 18
-	xor	e_64, tmp0
-	pxor	%xmm5, %xmm4		    # XMM4 = (W[t-15]<<7)^W[t-15]
-	ror	$14, tmp0 # 14
-	add	tmp0, T1
-	psllq	$(64-61), %xmm1		    # XMM1 = ((W[t-2] << 42)^W[t-2])<<3
-	add	h_64, T1
-	mov	a_64, T2
-	psllq	$(64-8), %xmm4		    # XMM4 = ((W[t-15]<<7)^W[t-15])<<56
-	xor	c_64, T2
-	and	b_64, T2
-	pxor	%xmm1, %xmm0		    # XMM0 = s1(W[t-2])
-	mov	a_64, tmp0
-	and	c_64, tmp0
-	idx = \rnd - 7
-	movdqu	W_t(idx), %xmm1		    # XMM1 = W[t-7]
-	xor	tmp0, T2
-	pxor	%xmm4, %xmm3                # XMM3 = s0(W[t-15])
-	mov	a_64, tmp0
-	paddq	%xmm3, %xmm0		    # XMM0 = s1(W[t-2]) + s0(W[t-15])
-	ror	$5, tmp0 # 39
-	idx =\rnd-16
-	paddq	W_t(idx), %xmm0		    # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16]
-	xor	a_64, tmp0
-	paddq	%xmm1, %xmm0	            # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16]
-	ror	$6, tmp0 # 34
-	movdqa	%xmm0, W_t(\rnd)	    # Store scheduled qwords
-	xor	a_64, tmp0
-	paddq	K_t(\rnd), %xmm0	    # Compute W[t]+K[t]
-	ror	$28, tmp0 # 28
-	idx = \rnd
-	movdqa	%xmm0, WK_2(idx)	    # Store W[t]+K[t] for next rounds
-	add	tmp0, T2
-	add	T1, d_64
-	lea	(T1, T2), h_64
-	RotateState
-.endm
-
-########################################################################
-# void sha512_transform_ssse3(const void* M, void* D, u64 L)#
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-#   message blocks.
-# L is the message length in SHA512 blocks.
-########################################################################
-ENTRY(sha512_transform_ssse3)
-
-	cmp $0, msglen
-	je nowork
-
-	# Allocate Stack Space
-	mov	%rsp, %rax
-	sub	$frame_size, %rsp
-	and	$~(0x20 - 1), %rsp
-	mov	%rax, frame_RSPSAVE(%rsp)
-
-	# Save GPRs
-	mov	%rbx, frame_GPRSAVE(%rsp)
-	mov	%r12, frame_GPRSAVE +8*1(%rsp)
-	mov	%r13, frame_GPRSAVE +8*2(%rsp)
-	mov	%r14, frame_GPRSAVE +8*3(%rsp)
-	mov	%r15, frame_GPRSAVE +8*4(%rsp)
-
-updateblock:
-
-# Load state variables
-	mov	DIGEST(0), a_64
-	mov	DIGEST(1), b_64
-	mov	DIGEST(2), c_64
-	mov	DIGEST(3), d_64
-	mov	DIGEST(4), e_64
-	mov	DIGEST(5), f_64
-	mov	DIGEST(6), g_64
-	mov	DIGEST(7), h_64
-
-	t = 0
-	.rept 80/2 + 1
-	# (80 rounds) / (2 rounds/iteration) + (1 iteration)
-	# +1 iteration because the scheduler leads hashing by 1 iteration
-		.if t < 2
-			# BSWAP 2 QWORDS
-			movdqa	XMM_QWORD_BSWAP(%rip), %xmm1
-			movdqu	MSG(t), %xmm0
-			pshufb	%xmm1, %xmm0	# BSWAP
-			movdqa	%xmm0, W_t(t)	# Store Scheduled Pair
-			paddq	K_t(t), %xmm0	# Compute W[t]+K[t]
-			movdqa	%xmm0, WK_2(t)	# Store into WK for rounds
-		.elseif t < 16
-			# BSWAP 2 QWORDS# Compute 2 Rounds
-			movdqu	MSG(t), %xmm0
-			pshufb	%xmm1, %xmm0	# BSWAP
-			SHA512_Round t-2	# Round t-2
-			movdqa	%xmm0, W_t(t)	# Store Scheduled Pair
-			paddq	K_t(t), %xmm0	# Compute W[t]+K[t]
-			SHA512_Round t-1	# Round t-1
-			movdqa	%xmm0, WK_2(t)	# Store W[t]+K[t] into WK
-		.elseif t < 79
-			# Schedule 2 QWORDS# Compute 2 Rounds
-			SHA512_2Sched_2Round_sse t
-		.else
-			# Compute 2 Rounds
-			SHA512_Round t-2
-			SHA512_Round t-1
-		.endif
-		t = t+2
-	.endr
-
-	# Update digest
-	add	a_64, DIGEST(0)
-	add	b_64, DIGEST(1)
-	add	c_64, DIGEST(2)
-	add	d_64, DIGEST(3)
-	add	e_64, DIGEST(4)
-	add	f_64, DIGEST(5)
-	add	g_64, DIGEST(6)
-	add	h_64, DIGEST(7)
-
-	# Advance to next message block
-	add	$16*8, msg
-	dec	msglen
-	jnz	updateblock
-
-	# Restore GPRs
-	mov	frame_GPRSAVE(%rsp),      %rbx
-	mov	frame_GPRSAVE +8*1(%rsp), %r12
-	mov	frame_GPRSAVE +8*2(%rsp), %r13
-	mov	frame_GPRSAVE +8*3(%rsp), %r14
-	mov	frame_GPRSAVE +8*4(%rsp), %r15
-
-	# Restore Stack Pointer
-	mov	frame_RSPSAVE(%rsp), %rsp
-
-nowork:
-	ret
-ENDPROC(sha512_transform_ssse3)
-
-########################################################################
-### Binary Data
-
-.data
-
-.align 16
-
-# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
-XMM_QWORD_BSWAP:
-	.octa 0x08090a0b0c0d0e0f0001020304050607
-
-# K[t] used in SHA512 hashing
-K512:
-	.quad 0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad 0x3956c25bf348b538,0x59f111f1b605d019
-	.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad 0xd807aa98a3030242,0x12835b0145706fbe
-	.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad 0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad 0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad 0x06ca6351e003826f,0x142929670a0e6e70
-	.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad 0x81c2c92e47edaee6,0x92722c851482353b
-	.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad 0xd192e819d6ef5218,0xd69906245565a910
-	.quad 0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad 0x90befffa23631e28,0xa4506cebde82bde9
-	.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad 0xca273eceea26619c,0xd186b8c721c0c207
-	.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad 0x113f9804bef90dae,0x1b710b35131c471b
-	.quad 0x28db77f523047d84,0x32caab7b40c72493
-	.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
-#endif
diff --git a/pkg/crypto/sha512/sha512.go b/pkg/crypto/sha512/sha512.go
deleted file mode 100644
index 8caf45c26..000000000
--- a/pkg/crypto/sha512/sha512.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// +build freebsd darwin windows 386 arm !cgo
-
-/*
- * Minio Cloud Storage, (C) 2014-2016 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package sha512
-
-import (
-	"hash"
-
-	"crypto/sha512"
-)
-
-// Size - The size of a SHA512 checksum in bytes.
-const Size = 64
-
-// BlockSize - The blocksize of SHA512 in bytes.
-const BlockSize = 128
-
-// New returns a new hash.Hash computing SHA512.
-func New() hash.Hash {
-	return sha512.New()
-}
-
-// Sum512 - single caller sha512 helper
-func Sum512(data []byte) [Size]byte {
-	return sha512.Sum512(data)
-}
diff --git a/pkg/crypto/sha512/sha512_linux.go b/pkg/crypto/sha512/sha512_linux.go
deleted file mode 100644
index d3a8877fe..000000000
--- a/pkg/crypto/sha512/sha512_linux.go
+++ /dev/null
@@ -1,166 +0,0 @@
-// +build linux,amd64,cgo
-
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file of
-// Golang project:
-//    https://github.com/golang/go/blob/master/LICENSE
-
-// Using this part of Minio codebase under the license
-// Apache License Version 2.0 with modifications
-
-// Package sha512 implements the SHA512 hash algorithms as defined
-// in FIPS 180-2.
-package sha512
-
-import (
-	"hash"
-
-	"github.com/klauspost/cpuid"
-)
-
-// Size - The size of a SHA512 checksum in bytes.
-const Size = 64
-
-// BlockSize - The blocksize of SHA512 in bytes.
-const BlockSize = 128
-
-const (
-	chunk = 128
-	init0 = 0x6a09e667f3bcc908
-	init1 = 0xbb67ae8584caa73b
-	init2 = 0x3c6ef372fe94f82b
-	init3 = 0xa54ff53a5f1d36f1
-	init4 = 0x510e527fade682d1
-	init5 = 0x9b05688c2b3e6c1f
-	init6 = 0x1f83d9abfb41bd6b
-	init7 = 0x5be0cd19137e2179
-)
-
-// digest represents the partial evaluation of a checksum.
-type digest struct {
-	h   [8]uint64
-	x   [chunk]byte
-	nx  int
-	len uint64
-}
-
-func block(dig *digest, p []byte) {
-	switch true {
-	case cpuid.CPU.AVX2():
-		blockAVX2(dig, p)
-	case cpuid.CPU.AVX():
-		blockAVX(dig, p)
-	case cpuid.CPU.SSSE3():
-		blockSSE(dig, p)
-	default:
-		blockGeneric(dig, p)
-	}
-}
-
-// Reset digest to its default value
-func (d *digest) Reset() {
-	d.h[0] = init0
-	d.h[1] = init1
-	d.h[2] = init2
-	d.h[3] = init3
-	d.h[4] = init4
-	d.h[5] = init5
-	d.h[6] = init6
-	d.h[7] = init7
-	d.nx = 0
-	d.len = 0
-}
-
-// New returns a new hash.Hash computing the SHA512 checksum.
-func New() hash.Hash {
-	d := new(digest)
-	d.Reset()
-	return d
-}
-
-// Sum512 - single caller sha512 helper
-func Sum512(data []byte) [Size]byte {
-	var d digest
-	d.Reset()
-	d.Write(data)
-	return d.checkSum()
-}
-
-// Return output array byte size
-func (d *digest) Size() int { return Size }
-
-// Return blockSize
-func (d *digest) BlockSize() int { return BlockSize }
-
-// Write blocks
-func (d *digest) Write(p []byte) (nn int, err error) {
-	nn = len(p)
-	d.len += uint64(nn)
-	if d.nx > 0 {
-		n := copy(d.x[d.nx:], p)
-		d.nx += n
-		if d.nx == chunk {
-			block(d, d.x[:])
-			d.nx = 0
-		}
-		p = p[n:]
-	}
-	if len(p) >= chunk {
-		n := len(p) &^ (chunk - 1)
-		block(d, p[:n])
-		p = p[n:]
-	}
-	if len(p) > 0 {
-		d.nx = copy(d.x[:], p)
-	}
-	return
-}
-
-// Calculate sha512
-func (d *digest) Sum(in []byte) []byte {
-	// Make a copy of d0 so that caller can keep writing and summing.
-	d0 := *d
-	hash := d0.checkSum()
-	return append(in, hash[:]...)
-}
-
-// internal checksum calculation, returns [Size]byte
-func (d *digest) checkSum() [Size]byte {
-	// Padding.  Add a 1 bit and 0 bits until 112 bytes mod 128.
-	len := d.len
-	var tmp [128]byte
-	tmp[0] = 0x80
-	if len%128 < 112 {
-		d.Write(tmp[0 : 112-len%128])
-	} else {
-		d.Write(tmp[0 : 128+112-len%128])
-	}
-
-	// Length in bits.
-	len <<= 3
-	for i := uint(0); i < 16; i++ {
-		tmp[i] = byte(len >> (120 - 8*i))
-	}
-	d.Write(tmp[0:16])
-
-	if d.nx != 0 {
-		panic("d.nx != 0")
-	}
-
-	h := d.h[:]
-
-	var digest [Size]byte
-	for i, s := range h {
-		digest[i*8] = byte(s >> 56)
-		digest[i*8+1] = byte(s >> 48)
-		digest[i*8+2] = byte(s >> 40)
-		digest[i*8+3] = byte(s >> 32)
-		digest[i*8+4] = byte(s >> 24)
-		digest[i*8+5] = byte(s >> 16)
-		digest[i*8+6] = byte(s >> 8)
-		digest[i*8+7] = byte(s)
-	}
-
-	return digest
-}
diff --git a/pkg/crypto/sha512/sha512_test.go b/pkg/crypto/sha512/sha512_test.go
deleted file mode 100644
index 3e1db6ebc..000000000
--- a/pkg/crypto/sha512/sha512_test.go
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Minio Cloud Storage, (C) 2014-2016 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file of
-// Golang project:
-//    https://github.com/golang/go/blob/master/LICENSE
-
-// Using this part of Minio codebase under the license
-// Apache License Version 2.0 with modifications
-
-// SHA512 hash algorithm.  See FIPS 180-2.
-
-package sha512
-
-import (
-	"fmt"
-	"io"
-	"testing"
-)
-
-type sha512Test struct {
-	out string
-	in  string
-}
-
-var golden = []sha512Test{
-	{"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", ""},
-	{"1f40fc92da241694750979ee6cf582f2d5d7d28e18335de05abc54d0560e0f5302860c652bf08d560252aa5e74210546f369fbbbce8c12cfc7957b2652fe9a75", "a"},
-	{"2d408a0717ec188158278a796c689044361dc6fdde28d6f04973b80896e1823975cdbf12eb63f9e0591328ee235d80e9b5bf1aa6a44f4617ff3caf6400eb172d", "ab"},
-	{"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f", "abc"},
-	{"d8022f2060ad6efd297ab73dcc5355c9b214054b0d1776a136a669d26a7d3b14f73aa0d0ebff19ee333368f0164b6419a96da49e3e481753e7e96b716bdccb6f", "abcd"},
-	{"878ae65a92e86cac011a570d4c30a7eaec442b85ce8eca0c2952b5e3cc0628c2e79d889ad4d5c7c626986d452dd86374b6ffaa7cd8b67665bef2289a5c70b0a1", "abcde"},
-	{"e32ef19623e8ed9d267f657a81944b3d07adbb768518068e88435745564e8d4150a0a703be2a7d88b61e3d390c2bb97e2d4c311fdc69d6b1267f05f59aa920e7", "abcdef"},
-	{"d716a4188569b68ab1b6dfac178e570114cdf0ea3a1cc0e31486c3e41241bc6a76424e8c37ab26f096fc85ef9886c8cb634187f4fddff645fb099f1ff54c6b8c", "abcdefg"},
-	{"a3a8c81bc97c2560010d7389bc88aac974a104e0e2381220c6e084c4dccd1d2d17d4f86db31c2a851dc80e6681d74733c55dcd03dd96f6062cdda12a291ae6ce", "abcdefgh"},
-	{"f22d51d25292ca1d0f68f69aedc7897019308cc9db46efb75a03dd494fc7f126c010e8ade6a00a0c1a5f1b75d81e0ed5a93ce98dc9b833db7839247b1d9c24fe", "abcdefghi"},
-	{"ef6b97321f34b1fea2169a7db9e1960b471aa13302a988087357c520be957ca119c3ba68e6b4982c019ec89de3865ccf6a3cda1fe11e59f98d99f1502c8b9745", "abcdefghij"},
-	{"2210d99af9c8bdecda1b4beff822136753d8342505ddce37f1314e2cdbb488c6016bdaa9bd2ffa513dd5de2e4b50f031393d8ab61f773b0e0130d7381e0f8a1d", "Discard medicine more than two years old."},
-	{"a687a8985b4d8d0a24f115fe272255c6afaf3909225838546159c1ed685c211a203796ae8ecc4c81a5b6315919b3a64f10713da07e341fcdbb08541bf03066ce", "He who has a shady past knows that nice guys finish last."},
-	{"8ddb0392e818b7d585ab22769a50df660d9f6d559cca3afc5691b8ca91b8451374e42bcdabd64589ed7c91d85f626596228a5c8572677eb98bc6b624befb7af8", "I wouldn't marry him with a ten foot pole."},
-	{"26ed8f6ca7f8d44b6a8a54ae39640fa8ad5c673f70ee9ce074ba4ef0d483eea00bab2f61d8695d6b34df9c6c48ae36246362200ed820448bdc03a720366a87c6", "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"},
-	{"e5a14bf044be69615aade89afcf1ab0389d5fc302a884d403579d1386a2400c089b0dbb387ed0f463f9ee342f8244d5a38cfbc0e819da9529fbff78368c9a982", "The days of the digital watch are numbered.  -Tom Stoppard"},
-	{"420a1faa48919e14651bed45725abe0f7a58e0f099424c4e5a49194946e38b46c1f8034b18ef169b2e31050d1648e0b982386595f7df47da4b6fd18e55333015", "Nepal premier won't resign."},
-	{"d926a863beadb20134db07683535c72007b0e695045876254f341ddcccde132a908c5af57baa6a6a9c63e6649bba0c213dc05fadcf9abccea09f23dcfb637fbe", "For every action there is an equal and opposite government program."},
-	{"9a98dd9bb67d0da7bf83da5313dff4fd60a4bac0094f1b05633690ffa7f6d61de9a1d4f8617937d560833a9aaa9ccafe3fd24db418d0e728833545cadd3ad92d", "His money is twice tainted: 'taint yours and 'taint mine."},
-	{"d7fde2d2351efade52f4211d3746a0780a26eec3df9b2ed575368a8a1c09ec452402293a8ea4eceb5a4f60064ea29b13cdd86918cd7a4faf366160b009804107", "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
-	{"b0f35ffa2697359c33a56f5c0cf715c7aeed96da9905ca2698acadb08fbc9e669bf566b6bd5d61a3e86dc22999bcc9f2224e33d1d4f32a228cf9d0349e2db518", "It's a tiny change to the code and not completely disgusting. - Bob Manchek"},
-	{"3d2e5f91778c9e66f7e061293aaa8a8fc742dd3b2e4f483772464b1144189b49273e610e5cccd7a81a19ca1fa70f16b10f1a100a4d8c1372336be8484c64b311", "size:  a.out:  bad magic"},
-	{"b2f68ff58ac015efb1c94c908b0d8c2bf06f491e4de8e6302c49016f7f8a33eac3e959856c7fddbc464de618701338a4b46f76dbfaf9a1e5262b5f40639771c7", "The major problem is with sendmail.  -Mark Horton"},
-	{"d8c92db5fdf52cf8215e4df3b4909d29203ff4d00e9ad0b64a6a4e04dec5e74f62e7c35c7fb881bd5de95442123df8f57a489b0ae616bd326f84d10021121c57", "Give me a rock, paper and scissors and I will move the world.  CCFestoon"},
-	{"19a9f8dc0a233e464e8566ad3ca9b91e459a7b8c4780985b015776e1bf239a19bc233d0556343e2b0a9bc220900b4ebf4f8bdf89ff8efeaf79602d6849e6f72e", "If the enemy is within range, then so are you."},
-	{"00b4c41f307bde87301cdc5b5ab1ae9a592e8ecbb2021dd7bc4b34e2ace60741cc362560bec566ba35178595a91932b8d5357e2c9cec92d393b0fa7831852476", "It's well we cannot hear the screams/That we create in others' dreams."},
-	{"91eccc3d5375fd026e4d6787874b1dce201cecd8a27dbded5065728cb2d09c58a3d467bb1faf353bf7ba567e005245d5321b55bc344f7c07b91cb6f26c959be7", "You remind me of a TV show, but that's all right: I watch it anyway."},
-	{"fabbbe22180f1f137cfdc9556d2570e775d1ae02a597ded43a72a40f9b485d500043b7be128fb9fcd982b83159a0d99aa855a9e7cc4240c00dc01a9bdf8218d7", "C is as portable as Stonehedge!!"},
-	{"2ecdec235c1fa4fc2a154d8fba1dddb8a72a1ad73838b51d792331d143f8b96a9f6fcb0f34d7caa351fe6d88771c4f105040e0392f06e0621689d33b2f3ba92e", "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"},
-	{"7ad681f6f96f82f7abfa7ecc0334e8fa16d3dc1cdc45b60b7af43fe4075d2357c0c1d60e98350f1afb1f2fe7a4d7cd2ad55b88e458e06b73c40b437331f5dab4", "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction.  Lewis-Randall Rule"},
-	{"833f9248ab4a3b9e5131f745fda1ffd2dd435b30e965957e78291c7ab73605fd1912b0794e5c233ab0a12d205a39778d19b83515d6a47003f19cdee51d98c7e0", "How can you write a big system without C++?  -Paul Glick"},
-}
-
-func TestGolden(t *testing.T) {
-	for i := 0; i < len(golden); i++ {
-		g := golden[i]
-		s := fmt.Sprintf("%x", Sum512([]byte(g.in)))
-		if s != g.out {
-			t.Fatalf("Sum512 function: sha512(%s) = %s want %s", g.in, s, g.out)
-		}
-		c := New()
-		for j := 0; j < 3; j++ {
-			if j < 2 {
-				io.WriteString(c, g.in)
-			} else {
-				io.WriteString(c, g.in[0:len(g.in)/2])
-				c.Sum(nil)
-				io.WriteString(c, g.in[len(g.in)/2:])
-			}
-			s := fmt.Sprintf("%x", c.Sum(nil))
-			if s != g.out {
-				t.Fatalf("sha512[%d](%s) = %s want %s", j, g.in, s, g.out)
-			}
-			c.Reset()
-		}
-	}
-}
-
-func TestSize(t *testing.T) {
-	c := New()
-	if got := c.Size(); got != Size {
-		t.Errorf("Size = %d; want %d", got, Size)
-	}
-}
-
-func TestBlockSize(t *testing.T) {
-	c := New()
-	if got := c.BlockSize(); got != BlockSize {
-		t.Errorf("BlockSize = %d; want %d", got, BlockSize)
-	}
-}
-
-var bench = New()
-var buf = make([]byte, 1024*1024)
-
-func benchmarkSize(b *testing.B, size int) {
-	b.SetBytes(int64(size))
-	sum := make([]byte, bench.Size())
-	for i := 0; i < b.N; i++ {
-		bench.Reset()
-		bench.Write(buf[:size])
-		bench.Sum(sum[:0])
-	}
-}
-
-func BenchmarkHash8Bytes(b *testing.B) {
-	benchmarkSize(b, 8)
-}
-
-func BenchmarkHash1K(b *testing.B) {
-	benchmarkSize(b, 1024)
-}
-
-func BenchmarkHash8K(b *testing.B) {
-	benchmarkSize(b, 8192)
-}
-
-func BenchmarkHash1M(b *testing.B) {
-	benchmarkSize(b, 1024*1024)
-}
diff --git a/pkg/crypto/sha512/sha512block.go b/pkg/crypto/sha512/sha512block.go
deleted file mode 100644
index f7af98e01..000000000
--- a/pkg/crypto/sha512/sha512block.go
+++ /dev/null
@@ -1,181 +0,0 @@
-// +build linux,amd64,cgo
-
-/*
- * Minio Cloud Storage, (C) 2014-2016 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Software block transform are provided by The Go Authors:
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file of
-// Golang project:
-//    https://github.com/golang/go/blob/master/LICENSE
-
-package sha512
-
-// #cgo CFLAGS: -DHAS_SSE41 -DHAS_AVX -DHAS_AVX2
-// #include <stdint.h>
-// void sha512_transform_ssse3 (const void* M, void* D, uint64_t L);
-// void sha512_transform_avx (const void* M, void* D, uint64_t L);
-// void sha512_transform_rorx (const void* M, void* D, uint64_t L);
-import "C"
-import (
-	"unsafe"
-)
-
-func blockSSE(dig *digest, p []byte) {
-	C.sha512_transform_ssse3(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk))
-}
-
-func blockAVX(dig *digest, p []byte) {
-	C.sha512_transform_avx(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk))
-}
-
-func blockAVX2(dig *digest, p []byte) {
-	C.sha512_transform_rorx(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk))
-}
-
-func blockGeneric(dig *digest, p []byte) {
-	var w [80]uint64
-	h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]
-	for len(p) >= chunk {
-		for i := 0; i < 16; i++ {
-			j := i * 8
-			w[i] = uint64(p[j])<<56 | uint64(p[j+1])<<48 | uint64(p[j+2])<<40 | uint64(p[j+3])<<32 |
-				uint64(p[j+4])<<24 | uint64(p[j+5])<<16 | uint64(p[j+6])<<8 | uint64(p[j+7])
-		}
-		for i := 16; i < 80; i++ {
-			v1 := w[i-2]
-			t1 := (v1>>19 | v1<<(64-19)) ^ (v1>>61 | v1<<(64-61)) ^ (v1 >> 6)
-			v2 := w[i-15]
-			t2 := (v2>>1 | v2<<(64-1)) ^ (v2>>8 | v2<<(64-8)) ^ (v2 >> 7)
-
-			w[i] = t1 + w[i-7] + t2 + w[i-16]
-		}
-
-		a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7
-
-		for i := 0; i < 80; i++ {
-			t1 := h + ((e>>14 | e<<(64-14)) ^ (e>>18 | e<<(64-18)) ^ (e>>41 | e<<(64-41))) + ((e & f) ^ (^e & g)) + _K[i] + w[i]
-
-			t2 := ((a>>28 | a<<(64-28)) ^ (a>>34 | a<<(64-34)) ^ (a>>39 | a<<(64-39))) + ((a & b) ^ (a & c) ^ (b & c))
-
-			h = g
-			g = f
-			f = e
-			e = d + t1
-			d = c
-			c = b
-			b = a
-			a = t1 + t2
-		}
-
-		h0 += a
-		h1 += b
-		h2 += c
-		h3 += d
-		h4 += e
-		h5 += f
-		h6 += g
-		h7 += h
-
-		p = p[chunk:]
-	}
-
-	dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
-}
-
-var _K = []uint64{
-	0x428a2f98d728ae22,
-	0x7137449123ef65cd,
-	0xb5c0fbcfec4d3b2f,
-	0xe9b5dba58189dbbc,
-	0x3956c25bf348b538,
-	0x59f111f1b605d019,
-	0x923f82a4af194f9b,
-	0xab1c5ed5da6d8118,
-	0xd807aa98a3030242,
-	0x12835b0145706fbe,
-	0x243185be4ee4b28c,
-	0x550c7dc3d5ffb4e2,
-	0x72be5d74f27b896f,
-	0x80deb1fe3b1696b1,
-	0x9bdc06a725c71235,
-	0xc19bf174cf692694,
-	0xe49b69c19ef14ad2,
-	0xefbe4786384f25e3,
-	0x0fc19dc68b8cd5b5,
-	0x240ca1cc77ac9c65,
-	0x2de92c6f592b0275,
-	0x4a7484aa6ea6e483,
-	0x5cb0a9dcbd41fbd4,
-	0x76f988da831153b5,
-	0x983e5152ee66dfab,
-	0xa831c66d2db43210,
-	0xb00327c898fb213f,
-	0xbf597fc7beef0ee4,
-	0xc6e00bf33da88fc2,
-	0xd5a79147930aa725,
-	0x06ca6351e003826f,
-	0x142929670a0e6e70,
-	0x27b70a8546d22ffc,
-	0x2e1b21385c26c926,
-	0x4d2c6dfc5ac42aed,
-	0x53380d139d95b3df,
-	0x650a73548baf63de,
-	0x766a0abb3c77b2a8,
-	0x81c2c92e47edaee6,
-	0x92722c851482353b,
-	0xa2bfe8a14cf10364,
-	0xa81a664bbc423001,
-	0xc24b8b70d0f89791,
-	0xc76c51a30654be30,
-	0xd192e819d6ef5218,
-	0xd69906245565a910,
-	0xf40e35855771202a,
-	0x106aa07032bbd1b8,
-	0x19a4c116b8d2d0c8,
-	0x1e376c085141ab53,
-	0x2748774cdf8eeb99,
-	0x34b0bcb5e19b48a8,
-	0x391c0cb3c5c95a63,
-	0x4ed8aa4ae3418acb,
-	0x5b9cca4f7763e373,
-	0x682e6ff3d6b2b8a3,
-	0x748f82ee5defb2fc,
-	0x78a5636f43172f60,
-	0x84c87814a1f0ab72,
-	0x8cc702081a6439ec,
-	0x90befffa23631e28,
-	0xa4506cebde82bde9,
-	0xbef9a3f7b2c67915,
-	0xc67178f2e372532b,
-	0xca273eceea26619c,
-	0xd186b8c721c0c207,
-	0xeada7dd6cde0eb1e,
-	0xf57d4f7fee6ed178,
-	0x06f067aa72176fba,
-	0x0a637dc5a2c898a6,
-	0x113f9804bef90dae,
-	0x1b710b35131c471b,
-	0x28db77f523047d84,
-	0x32caab7b40c72493,
-	0x3c9ebe0a15c9bebc,
-	0x431d67c49c100d4c,
-	0x4cc5d4becb3e42b6,
-	0x597f299cfc657e2a,
-	0x5fcb6fab3ad6faec,
-	0x6c44198c4a475817,
-}
diff --git a/vendor/github.com/dchest/blake2b/README b/vendor/github.com/dchest/blake2b/README
new file mode 100644
index 000000000..09f014361
--- /dev/null
+++ b/vendor/github.com/dchest/blake2b/README
@@ -0,0 +1,23 @@
+Go implementation of BLAKE2b collision-resistant cryptographic hash function
+created by Jean-Philippe Aumasson, Samuel Neves, Zooko Wilcox-O'Hearn, and
+Christian Winnerlein (https://blake2.net).
+
+INSTALLATION
+
+    $ go get github.com/dchest/blake2b
+
+
+DOCUMENTATION
+
+    See http://godoc.org/github.com/dchest/blake2b
+
+
+PUBLIC DOMAIN DEDICATION
+
+Written in 2012 by Dmitry Chestnykh.
+
+To the extent possible under law, the author have dedicated all copyright
+and related and neighboring rights to this software to the public domain
+worldwide. This software is distributed without any warranty.
+http://creativecommons.org/publicdomain/zero/1.0/
+
diff --git a/vendor/github.com/dchest/blake2b/blake2b.go b/vendor/github.com/dchest/blake2b/blake2b.go
new file mode 100644
index 000000000..f3eb38872
--- /dev/null
+++ b/vendor/github.com/dchest/blake2b/blake2b.go
@@ -0,0 +1,299 @@
+// Written in 2012 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+// Package blake2b implements BLAKE2b cryptographic hash function.
+package blake2b
+
+import (
+	"encoding/binary"
+	"errors"
+	"hash"
+)
+
+const (
+	BlockSize  = 128 // block size of algorithm
+	Size       = 64  // maximum digest size
+	SaltSize   = 16  // maximum salt size
+	PersonSize = 16  // maximum personalization string size
+	KeySize    = 64  // maximum size of key
+)
+
+type digest struct {
+	h  [8]uint64       // current chain value
+	t  [2]uint64       // message bytes counter
+	f  [2]uint64       // finalization flags
+	x  [BlockSize]byte // buffer for data not yet compressed
+	nx int             // number of bytes in buffer
+
+	ih         [8]uint64       // initial chain value (after config)
+	paddedKey  [BlockSize]byte // copy of key, padded with zeros
+	isKeyed    bool            // indicates whether hash was keyed
+	size       uint8           // digest size in bytes
+	isLastNode bool            // indicates processing of the last node in tree hashing
+}
+
+// Initialization values.
+var iv = [8]uint64{
+	0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
+	0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
+	0x510e527fade682d1, 0x9b05688c2b3e6c1f,
+	0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
+}
+
+// Config is used to configure hash function parameters and keying.
+// All parameters are optional.
+type Config struct {
+	Size   uint8  // digest size (if zero, default size of 64 bytes is used)
+	Key    []byte // key for prefix-MAC
+	Salt   []byte // salt (if < 16 bytes, padded with zeros)
+	Person []byte // personalization (if < 16 bytes, padded with zeros)
+	Tree   *Tree  // parameters for tree hashing
+}
+
+// Tree represents parameters for tree hashing.
+type Tree struct {
+	Fanout        uint8  // fanout
+	MaxDepth      uint8  // maximal depth
+	LeafSize      uint32 // leaf maximal byte length (0 for unlimited)
+	NodeOffset    uint64 // node offset (0 for first, leftmost or leaf)
+	NodeDepth     uint8  // node depth (0 for leaves)
+	InnerHashSize uint8  // inner hash byte length
+	IsLastNode    bool   // indicates processing of the last node of layer
+}
+
+var (
+	defaultConfig = &Config{Size: Size}
+	config256     = &Config{Size: 32}
+)
+
+func verifyConfig(c *Config) error {
+	if c.Size > Size {
+		return errors.New("digest size is too large")
+	}
+	if len(c.Key) > KeySize {
+		return errors.New("key is too large")
+	}
+	if len(c.Salt) > SaltSize {
+		// Smaller salt is okay: it will be padded with zeros.
+		return errors.New("salt is too large")
+	}
+	if len(c.Person) > PersonSize {
+		// Smaller personalization is okay: it will be padded with zeros.
+		return errors.New("personalization is too large")
+	}
+	if c.Tree != nil {
+		if c.Tree.Fanout == 1 {
+			return errors.New("fanout of 1 is not allowed in tree mode")
+		}
+		if c.Tree.MaxDepth < 2 {
+			return errors.New("incorrect tree depth")
+		}
+		if c.Tree.InnerHashSize < 1 || c.Tree.InnerHashSize > Size {
+			return errors.New("incorrect tree inner hash size")
+		}
+	}
+	return nil
+}
+
+// New returns a new hash.Hash configured with the given Config.
+// Config can be nil, in which case the default one is used, calculating 64-byte digest.
+// Returns non-nil error if Config contains invalid parameters.
+func New(c *Config) (hash.Hash, error) {
+	if c == nil {
+		c = defaultConfig
+	} else {
+		if c.Size == 0 {
+			// Set default size if it's zero.
+			c.Size = Size
+		}
+		if err := verifyConfig(c); err != nil {
+			return nil, err
+		}
+	}
+	d := new(digest)
+	d.initialize(c)
+	return d, nil
+}
+
+// initialize initializes digest with the given
+// config, which must be non-nil and verified.
+func (d *digest) initialize(c *Config) {
+	// Create parameter block.
+	var p [BlockSize]byte
+	p[0] = c.Size
+	p[1] = uint8(len(c.Key))
+	if c.Salt != nil {
+		copy(p[32:], c.Salt)
+	}
+	if c.Person != nil {
+		copy(p[48:], c.Person)
+	}
+	if c.Tree != nil {
+		p[2] = c.Tree.Fanout
+		p[3] = c.Tree.MaxDepth
+		binary.LittleEndian.PutUint32(p[4:], c.Tree.LeafSize)
+		binary.LittleEndian.PutUint64(p[8:], c.Tree.NodeOffset)
+		p[16] = c.Tree.NodeDepth
+		p[17] = c.Tree.InnerHashSize
+	} else {
+		p[2] = 1
+		p[3] = 1
+	}
+	// Initialize.
+	d.size = c.Size
+	for i := 0; i < 8; i++ {
+		d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(p[i*8:])
+	}
+	if c.Tree != nil && c.Tree.IsLastNode {
+		d.isLastNode = true
+	}
+	// Process key.
+	if c.Key != nil {
+		copy(d.paddedKey[:], c.Key)
+		d.Write(d.paddedKey[:])
+		d.isKeyed = true
+	}
+	// Save a copy of initialized state.
+	copy(d.ih[:], d.h[:])
+}
+
+// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum.
+func New512() hash.Hash {
+	d := new(digest)
+	d.initialize(defaultConfig)
+	return d
+}
+
+// New256 returns a new hash.Hash computing the BLAKE2b 32-byte checksum.
+func New256() hash.Hash {
+	d := new(digest)
+	d.initialize(config256)
+	return d
+}
+
+// NewMAC returns a new hash.Hash computing BLAKE2b prefix-
+// Message Authentication Code of the given size in bytes
+// (up to 64) with the given key (up to 64 bytes in length).
+func NewMAC(outBytes uint8, key []byte) hash.Hash {
+	d, err := New(&Config{Size: outBytes, Key: key})
+	if err != nil {
+		panic(err.Error())
+	}
+	return d
+}
+
+// Reset resets the state of digest to the initial state
+// after configuration and keying.
+func (d *digest) Reset() {
+	copy(d.h[:], d.ih[:])
+	d.t[0] = 0
+	d.t[1] = 0
+	d.f[0] = 0
+	d.f[1] = 0
+	d.nx = 0
+	if d.isKeyed {
+		d.Write(d.paddedKey[:])
+	}
+}
+
+// Size returns the digest size in bytes.
+func (d *digest) Size() int { return int(d.size) }
+
+// BlockSize returns the algorithm block size in bytes.
+func (d *digest) BlockSize() int { return BlockSize }
+
+func (d *digest) Write(p []byte) (nn int, err error) {
+	nn = len(p)
+	left := BlockSize - d.nx
+	if len(p) > left {
+		// Process buffer.
+		copy(d.x[d.nx:], p[:left])
+		p = p[left:]
+		blocks(d, d.x[:])
+		d.nx = 0
+	}
+	// Process full blocks except for the last one.
+	if len(p) > BlockSize {
+		n := len(p) &^ (BlockSize - 1)
+		if n == len(p) {
+			n -= BlockSize
+		}
+		blocks(d, p[:n])
+		p = p[n:]
+	}
+	// Fill buffer.
+	d.nx += copy(d.x[d.nx:], p)
+	return
+}
+
+// Sum returns the calculated checksum.
+func (d0 *digest) Sum(in []byte) []byte {
+	// Make a copy of d0 so that caller can keep writing and summing.
+	d := *d0
+	hash := d.checkSum()
+	return append(in, hash[:d.size]...)
+}
+
+func (d *digest) checkSum() [Size]byte {
+	// Do not create unnecessary copies of the key.
+	if d.isKeyed {
+		for i := 0; i < len(d.paddedKey); i++ {
+			d.paddedKey[i] = 0
+		}
+	}
+
+	dec := BlockSize - uint64(d.nx)
+	if d.t[0] < dec {
+		d.t[1]--
+	}
+	d.t[0] -= dec
+
+	// Pad buffer with zeros.
+	for i := d.nx; i < len(d.x); i++ {
+		d.x[i] = 0
+	}
+	// Set last block flag.
+	d.f[0] = 0xffffffffffffffff
+	if d.isLastNode {
+		d.f[1] = 0xffffffffffffffff
+	}
+	// Compress last block.
+	blocks(d, d.x[:])
+
+	var out [Size]byte
+	j := 0
+	for _, s := range d.h[:(d.size-1)/8+1] {
+		out[j+0] = byte(s >> 0)
+		out[j+1] = byte(s >> 8)
+		out[j+2] = byte(s >> 16)
+		out[j+3] = byte(s >> 24)
+		out[j+4] = byte(s >> 32)
+		out[j+5] = byte(s >> 40)
+		out[j+6] = byte(s >> 48)
+		out[j+7] = byte(s >> 56)
+		j += 8
+	}
+	return out
+}
+
+// Sum512 returns a 64-byte BLAKE2b hash of data.
+func Sum512(data []byte) [64]byte {
+	var d digest
+	d.initialize(defaultConfig)
+	d.Write(data)
+	return d.checkSum()
+}
+
+// Sum256 returns a 32-byte BLAKE2b hash of data.
+func Sum256(data []byte) (out [32]byte) {
+	var d digest
+	d.initialize(config256)
+	d.Write(data)
+	sum := d.checkSum()
+	copy(out[:], sum[:32])
+	return
+}
diff --git a/vendor/github.com/dchest/blake2b/block.go b/vendor/github.com/dchest/blake2b/block.go
new file mode 100644
index 000000000..2f04bb78b
--- /dev/null
+++ b/vendor/github.com/dchest/blake2b/block.go
@@ -0,0 +1,1420 @@
+// Written in 2012 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+// BLAKE2b compression of message blocks.
+
+package blake2b
+
+func blocks(d *digest, p []uint8) {
+	h0, h1, h2, h3, h4, h5, h6, h7 := d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
+
+	for len(p) >= BlockSize {
+		// Increment counter.
+		d.t[0] += BlockSize
+		if d.t[0] < BlockSize {
+			d.t[1]++
+		}
+		// Initialize compression function.
+		v0, v1, v2, v3, v4, v5, v6, v7 := h0, h1, h2, h3, h4, h5, h6, h7
+		v8 := iv[0]
+		v9 := iv[1]
+		v10 := iv[2]
+		v11 := iv[3]
+		v12 := iv[4] ^ d.t[0]
+		v13 := iv[5] ^ d.t[1]
+		v14 := iv[6] ^ d.f[0]
+		v15 := iv[7] ^ d.f[1]
+		var m [16]uint64
+
+		j := 0
+		for i := 0; i < 16; i++ {
+			m[i] = uint64(p[j]) | uint64(p[j+1])<<8 | uint64(p[j+2])<<16 | uint64(p[j+3])<<24 |
+				uint64(p[j+4])<<32 | uint64(p[j+5])<<40 | uint64(p[j+6])<<48 | uint64(p[j+7])<<56
+			j += 8
+		}
+
+		// Round 1.
+		v0 += m[0]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[2]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[4]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[6]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[5]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[7]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[3]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[1]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[8]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[10]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[12]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[14]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[13]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[15]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[11]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[9]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 2.
+		v0 += m[14]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[4]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[9]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[13]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[15]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[6]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[8]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[10]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[1]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[0]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[11]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[5]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[7]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[3]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[2]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[12]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 3.
+		v0 += m[11]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[12]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[5]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[15]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[2]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[13]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[0]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[8]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[10]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[3]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[7]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[9]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[1]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[4]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[6]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[14]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 4.
+		v0 += m[7]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[3]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[13]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[11]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[12]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[14]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[1]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[9]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[2]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[5]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[4]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[15]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[0]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[8]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[10]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[6]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 5.
+		v0 += m[9]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[5]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[2]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[10]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[4]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[15]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[7]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[0]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[14]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[11]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[6]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[3]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[8]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[13]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[12]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[1]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 6.
+		v0 += m[2]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[6]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[0]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[8]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[11]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[3]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[10]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[12]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[4]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[7]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[15]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[1]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[14]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[9]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[5]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[13]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 7.
+		v0 += m[12]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[1]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[14]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[4]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[13]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[10]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[15]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[5]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[0]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[6]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[9]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[8]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[2]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[11]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[3]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[7]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 8.
+		v0 += m[13]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[7]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[12]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[3]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[1]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[9]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[14]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[11]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[5]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[15]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[8]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[2]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[6]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[10]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[4]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[0]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 9.
+		v0 += m[6]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[14]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[11]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[0]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[3]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[8]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[9]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[15]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[12]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[13]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[1]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[10]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[4]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[5]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[7]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[2]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 10.
+		v0 += m[10]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[8]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[7]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[1]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[6]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[5]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[4]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[2]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[15]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[9]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[3]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[13]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[12]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[0]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[14]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[11]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 11.
+		v0 += m[0]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[2]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[4]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[6]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[5]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[7]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[3]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[1]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[8]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[10]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[12]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[14]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[13]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[15]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[11]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[9]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		// Round 12.
+		v0 += m[14]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-32) | v12>>32
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-24) | v4>>24
+		v1 += m[4]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-32) | v13>>32
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-24) | v5>>24
+		v2 += m[9]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-32) | v14>>32
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-24) | v6>>24
+		v3 += m[13]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-32) | v15>>32
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-24) | v7>>24
+		v2 += m[15]
+		v2 += v6
+		v14 ^= v2
+		v14 = v14<<(64-16) | v14>>16
+		v10 += v14
+		v6 ^= v10
+		v6 = v6<<(64-63) | v6>>63
+		v3 += m[6]
+		v3 += v7
+		v15 ^= v3
+		v15 = v15<<(64-16) | v15>>16
+		v11 += v15
+		v7 ^= v11
+		v7 = v7<<(64-63) | v7>>63
+		v1 += m[8]
+		v1 += v5
+		v13 ^= v1
+		v13 = v13<<(64-16) | v13>>16
+		v9 += v13
+		v5 ^= v9
+		v5 = v5<<(64-63) | v5>>63
+		v0 += m[10]
+		v0 += v4
+		v12 ^= v0
+		v12 = v12<<(64-16) | v12>>16
+		v8 += v12
+		v4 ^= v8
+		v4 = v4<<(64-63) | v4>>63
+		v0 += m[1]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-32) | v15>>32
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-24) | v5>>24
+		v1 += m[0]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-32) | v12>>32
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-24) | v6>>24
+		v2 += m[11]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-32) | v13>>32
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-24) | v7>>24
+		v3 += m[5]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-32) | v14>>32
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-24) | v4>>24
+		v2 += m[7]
+		v2 += v7
+		v13 ^= v2
+		v13 = v13<<(64-16) | v13>>16
+		v8 += v13
+		v7 ^= v8
+		v7 = v7<<(64-63) | v7>>63
+		v3 += m[3]
+		v3 += v4
+		v14 ^= v3
+		v14 = v14<<(64-16) | v14>>16
+		v9 += v14
+		v4 ^= v9
+		v4 = v4<<(64-63) | v4>>63
+		v1 += m[2]
+		v1 += v6
+		v12 ^= v1
+		v12 = v12<<(64-16) | v12>>16
+		v11 += v12
+		v6 ^= v11
+		v6 = v6<<(64-63) | v6>>63
+		v0 += m[12]
+		v0 += v5
+		v15 ^= v0
+		v15 = v15<<(64-16) | v15>>16
+		v10 += v15
+		v5 ^= v10
+		v5 = v5<<(64-63) | v5>>63
+
+		h0 ^= v0 ^ v8
+		h1 ^= v1 ^ v9
+		h2 ^= v2 ^ v10
+		h3 ^= v3 ^ v11
+		h4 ^= v4 ^ v12
+		h5 ^= v5 ^ v13
+		h6 ^= v6 ^ v14
+		h7 ^= v7 ^ v15
+
+		p = p[BlockSize:]
+	}
+	d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
+}
diff --git a/vendor/vendor.json b/vendor/vendor.json
index f86a4d520..e139499e5 100644
--- a/vendor/vendor.json
+++ b/vendor/vendor.json
@@ -12,6 +12,11 @@
 			"revision": "c1f48d5ce4f292dfb775ef52aaedd15be323510d",
 			"revisionTime": "2016-05-20T13:10:51+03:00"
 		},
+		{
+			"path": "github.com/dchest/blake2b",
+			"revision": "3c8c640cd7bea3ca78209d812b5854442ab92fed",
+			"revisionTime": "2015-10-22T12:35:02+02:00"
+		},
 		{
 			"path": "github.com/dgrijalva/jwt-go",
 			"revision": "afef698c326bfd906b11659432544e5aae441d44",