From b21c0995226b971b47aa05aa94bc0063a264dfe8 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Fri, 6 Dec 2024 18:23:10 +0000
Subject: [PATCH] Vendor

---
 .../ai3/go-common/pwhash/argon2.go            |  221 +
 .../ai3/go-common/pwhash/crypt.go             |   39 +
 .../ai3/go-common/pwhash/password.go          |   82 +
 .../ai3/go-common/pwhash/scrypt.go            |  116 +
 vendor/github.com/amoghe/go-crypt/.gitignore  |   24 +
 vendor/github.com/amoghe/go-crypt/.travis.yml |    5 +
 vendor/github.com/amoghe/go-crypt/LICENSE     |   22 +
 vendor/github.com/amoghe/go-crypt/README.md   |  108 +
 vendor/github.com/amoghe/go-crypt/crypt.go    |   59 +
 vendor/github.com/amoghe/go-crypt/crypt_r.go  |   78 +
 .../amoghe/go-crypt/crypt_unsupported.go      |   17 +
 vendor/github.com/amoghe/go-crypt/version.go  |   25 +
 vendor/github.com/coreos/go-systemd/LICENSE   |  191 +
 vendor/github.com/coreos/go-systemd/NOTICE    |    5 +
 .../coreos/go-systemd/activation/files.go     |   67 +
 .../coreos/go-systemd/activation/listeners.go |  103 +
 .../go-systemd/activation/packetconns.go      |   38 +
 vendor/golang.org/x/crypto/LICENSE            |   27 +
 vendor/golang.org/x/crypto/PATENTS            |   22 +
 vendor/golang.org/x/crypto/argon2/argon2.go   |  283 +
 vendor/golang.org/x/crypto/argon2/blake2b.go  |   53 +
 .../x/crypto/argon2/blamka_amd64.go           |   60 +
 .../golang.org/x/crypto/argon2/blamka_amd64.s | 2791 ++++++++++
 .../x/crypto/argon2/blamka_generic.go         |  163 +
 .../golang.org/x/crypto/argon2/blamka_ref.go  |   15 +
 vendor/golang.org/x/crypto/blake2b/blake2b.go |  291 ++
 .../x/crypto/blake2b/blake2bAVX2_amd64.go     |   37 +
 .../x/crypto/blake2b/blake2bAVX2_amd64.s      | 4559 +++++++++++++++++
 .../x/crypto/blake2b/blake2b_amd64.s          | 1441 ++++++
 .../x/crypto/blake2b/blake2b_generic.go       |  182 +
 .../x/crypto/blake2b/blake2b_ref.go           |   11 +
 vendor/golang.org/x/crypto/blake2b/blake2x.go |  177 +
 .../golang.org/x/crypto/blake2b/register.go   |   30 +
 vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go   |   77 +
 vendor/golang.org/x/crypto/scrypt/scrypt.go   |  212 +
 vendor/golang.org/x/sys/LICENSE               |   27 +
 vendor/golang.org/x/sys/PATENTS               |   22 +
 vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s   |   17 +
 vendor/golang.org/x/sys/cpu/byteorder.go      |   66 +
 vendor/golang.org/x/sys/cpu/cpu.go            |  312 ++
 vendor/golang.org/x/sys/cpu/cpu_aix.go        |   33 +
 vendor/golang.org/x/sys/cpu/cpu_arm.go        |   73 +
 vendor/golang.org/x/sys/cpu/cpu_arm64.go      |  194 +
 vendor/golang.org/x/sys/cpu/cpu_arm64.s       |   39 +
 vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go   |   12 +
 vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go   |   21 +
 vendor/golang.org/x/sys/cpu/cpu_gc_x86.go     |   15 +
 .../golang.org/x/sys/cpu/cpu_gccgo_arm64.go   |   11 +
 .../golang.org/x/sys/cpu/cpu_gccgo_s390x.go   |   22 +
 vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c   |   37 +
 vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go  |   31 +
 vendor/golang.org/x/sys/cpu/cpu_linux.go      |   15 +
 vendor/golang.org/x/sys/cpu/cpu_linux_arm.go  |   39 +
 .../golang.org/x/sys/cpu/cpu_linux_arm64.go   |  121 +
 .../golang.org/x/sys/cpu/cpu_linux_mips64x.go |   22 +
 .../golang.org/x/sys/cpu/cpu_linux_noinit.go  |    9 +
 .../golang.org/x/sys/cpu/cpu_linux_ppc64x.go  |   30 +
 .../golang.org/x/sys/cpu/cpu_linux_riscv64.go |  137 +
 .../golang.org/x/sys/cpu/cpu_linux_s390x.go   |   40 +
 vendor/golang.org/x/sys/cpu/cpu_loong64.go    |   12 +
 vendor/golang.org/x/sys/cpu/cpu_mips64x.go    |   15 +
 vendor/golang.org/x/sys/cpu/cpu_mipsx.go      |   11 +
 .../golang.org/x/sys/cpu/cpu_netbsd_arm64.go  |  173 +
 .../golang.org/x/sys/cpu/cpu_openbsd_arm64.go |   65 +
 .../golang.org/x/sys/cpu/cpu_openbsd_arm64.s  |   11 +
 vendor/golang.org/x/sys/cpu/cpu_other_arm.go  |    9 +
 .../golang.org/x/sys/cpu/cpu_other_arm64.go   |    9 +
 .../golang.org/x/sys/cpu/cpu_other_mips64x.go |   11 +
 .../golang.org/x/sys/cpu/cpu_other_ppc64x.go  |   12 +
 .../golang.org/x/sys/cpu/cpu_other_riscv64.go |   11 +
 vendor/golang.org/x/sys/cpu/cpu_ppc64x.go     |   16 +
 vendor/golang.org/x/sys/cpu/cpu_riscv64.go    |   20 +
 vendor/golang.org/x/sys/cpu/cpu_s390x.go      |  172 +
 vendor/golang.org/x/sys/cpu/cpu_s390x.s       |   57 +
 vendor/golang.org/x/sys/cpu/cpu_wasm.go       |   17 +
 vendor/golang.org/x/sys/cpu/cpu_x86.go        |  151 +
 vendor/golang.org/x/sys/cpu/cpu_x86.s         |   26 +
 vendor/golang.org/x/sys/cpu/cpu_zos.go        |   10 +
 vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go  |   25 +
 vendor/golang.org/x/sys/cpu/endian_big.go     |   10 +
 vendor/golang.org/x/sys/cpu/endian_little.go  |   10 +
 vendor/golang.org/x/sys/cpu/hwcap_linux.go    |   71 +
 vendor/golang.org/x/sys/cpu/parse.go          |   43 +
 .../x/sys/cpu/proc_cpuinfo_linux.go           |   53 +
 vendor/golang.org/x/sys/cpu/runtime_auxv.go   |   16 +
 .../x/sys/cpu/runtime_auxv_go121.go           |   18 +
 .../golang.org/x/sys/cpu/syscall_aix_gccgo.go |   26 +
 .../x/sys/cpu/syscall_aix_ppc64_gc.go         |   35 +
 vendor/modules.txt                            |   18 +
 89 files changed, 14109 insertions(+)
 create mode 100644 vendor/git.autistici.org/ai3/go-common/pwhash/argon2.go
 create mode 100644 vendor/git.autistici.org/ai3/go-common/pwhash/crypt.go
 create mode 100644 vendor/git.autistici.org/ai3/go-common/pwhash/password.go
 create mode 100644 vendor/git.autistici.org/ai3/go-common/pwhash/scrypt.go
 create mode 100644 vendor/github.com/amoghe/go-crypt/.gitignore
 create mode 100644 vendor/github.com/amoghe/go-crypt/.travis.yml
 create mode 100644 vendor/github.com/amoghe/go-crypt/LICENSE
 create mode 100644 vendor/github.com/amoghe/go-crypt/README.md
 create mode 100644 vendor/github.com/amoghe/go-crypt/crypt.go
 create mode 100644 vendor/github.com/amoghe/go-crypt/crypt_r.go
 create mode 100644 vendor/github.com/amoghe/go-crypt/crypt_unsupported.go
 create mode 100644 vendor/github.com/amoghe/go-crypt/version.go
 create mode 100644 vendor/github.com/coreos/go-systemd/LICENSE
 create mode 100644 vendor/github.com/coreos/go-systemd/NOTICE
 create mode 100644 vendor/github.com/coreos/go-systemd/activation/files.go
 create mode 100644 vendor/github.com/coreos/go-systemd/activation/listeners.go
 create mode 100644 vendor/github.com/coreos/go-systemd/activation/packetconns.go
 create mode 100644 vendor/golang.org/x/crypto/LICENSE
 create mode 100644 vendor/golang.org/x/crypto/PATENTS
 create mode 100644 vendor/golang.org/x/crypto/argon2/argon2.go
 create mode 100644 vendor/golang.org/x/crypto/argon2/blake2b.go
 create mode 100644 vendor/golang.org/x/crypto/argon2/blamka_amd64.go
 create mode 100644 vendor/golang.org/x/crypto/argon2/blamka_amd64.s
 create mode 100644 vendor/golang.org/x/crypto/argon2/blamka_generic.go
 create mode 100644 vendor/golang.org/x/crypto/argon2/blamka_ref.go
 create mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b.go
 create mode 100644 vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go
 create mode 100644 vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
 create mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
 create mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_generic.go
 create mode 100644 vendor/golang.org/x/crypto/blake2b/blake2b_ref.go
 create mode 100644 vendor/golang.org/x/crypto/blake2b/blake2x.go
 create mode 100644 vendor/golang.org/x/crypto/blake2b/register.go
 create mode 100644 vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
 create mode 100644 vendor/golang.org/x/crypto/scrypt/scrypt.go
 create mode 100644 vendor/golang.org/x/sys/LICENSE
 create mode 100644 vendor/golang.org/x/sys/PATENTS
 create mode 100644 vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
 create mode 100644 vendor/golang.org/x/sys/cpu/byteorder.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_aix.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_arm.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_arm64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_arm64.s
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_arm.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_loong64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_mips64x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_mipsx.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.s
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_arm.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_riscv64.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_s390x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_s390x.s
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_wasm.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_x86.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_x86.s
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_zos.go
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go
 create mode 100644 vendor/golang.org/x/sys/cpu/endian_big.go
 create mode 100644 vendor/golang.org/x/sys/cpu/endian_little.go
 create mode 100644 vendor/golang.org/x/sys/cpu/hwcap_linux.go
 create mode 100644 vendor/golang.org/x/sys/cpu/parse.go
 create mode 100644 vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go
 create mode 100644 vendor/golang.org/x/sys/cpu/runtime_auxv.go
 create mode 100644 vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go
 create mode 100644 vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
 create mode 100644 vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
 create mode 100644 vendor/modules.txt

diff --git a/vendor/git.autistici.org/ai3/go-common/pwhash/argon2.go b/vendor/git.autistici.org/ai3/go-common/pwhash/argon2.go
new file mode 100644
index 0000000..fef03ca
--- /dev/null
+++ b/vendor/git.autistici.org/ai3/go-common/pwhash/argon2.go
@@ -0,0 +1,221 @@
+package pwhash
+
+import (
+	"crypto/subtle"
+	"encoding/base64"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"log"
+	"strconv"
+	"strings"
+
+	"golang.org/x/crypto/argon2"
+)
+
+var (
+	argonKeyLen  uint32 = 32
+	argonSaltLen        = 16
+)
+
+// Argon2PasswordHash uses the Argon2 hashing algorithm.
+type argon2PasswordHash struct {
+	// Encoding parameters.
+	params argon2Params
+
+	// Codec for string encoding.
+	codec argon2Codec
+}
+
+// newArgon2PasswordHash returns an Argon2i-based PasswordHash using the
+// specified parameters for time, memory, and number of threads.
+func newArgon2PasswordHash(time, mem uint32, threads uint8, codec argon2Codec) PasswordHash {
+	return &argon2PasswordHash{
+		params: argon2Params{
+			Time:    time,
+			Memory:  mem,
+			Threads: threads,
+		},
+		codec: codec,
+	}
+}
+
+// NewArgon2 returns an Argon2i-based PasswordHash using the default parameters.
+func NewArgon2() PasswordHash {
+	return NewArgon2WithParams(
+		defaultArgon2Params.Time,
+		defaultArgon2Params.Memory,
+		defaultArgon2Params.Threads,
+	)
+}
+
+// NewArgon2WithParams returns an Argon2i-based PasswordHash using the
+// specified parameters for time, memory, and number of threads.
+func NewArgon2WithParams(time, mem uint32, threads uint8) PasswordHash {
+	return newArgon2PasswordHash(time, mem, threads, &a2Codec{})
+}
+
+// NewArgon2Std returns an Argon2i-based PasswordHash that conforms
+// to the reference implementation encoding, using default parameters.
+func NewArgon2Std() PasswordHash {
+	return NewArgon2StdWithParams(
+		defaultArgon2Params.Time,
+		defaultArgon2Params.Memory,
+		defaultArgon2Params.Threads,
+	)
+}
+
+// NewArgon2StdWithParams returns an Argon2i-based PasswordHash using
+// the specified parameters for time, memory, and number of
+// threads. This will use the string encoding ("$argon2$") documented
+// in the argon2 reference implementation.
+func NewArgon2StdWithParams(time, mem uint32, threads uint8) PasswordHash {
+	return newArgon2PasswordHash(time, mem, threads, &argon2StdCodec{})
+}
+
+// ComparePassword returns true if the given password matches the
+// encrypted one.
+func (s *argon2PasswordHash) ComparePassword(encrypted, password string) bool {
+	params, salt, dk, err := s.codec.decodeArgon2Hash(encrypted)
+	if err != nil {
+		return false
+	}
+	dk2 := argon2.Key([]byte(password), salt, params.Time, params.Memory, params.Threads, argonKeyLen)
+	return subtle.ConstantTimeCompare(dk, dk2) == 1
+}
+
+// Encrypt the given password with the Argon2 algorithm.
+func (s *argon2PasswordHash) Encrypt(password string) string {
+	salt := getRandomBytes(argonSaltLen)
+	dk := argon2.Key([]byte(password), salt, s.params.Time, s.params.Memory, s.params.Threads, argonKeyLen)
+	return s.codec.encodeArgon2Hash(s.params, salt, dk)
+}
+
+type argon2Params struct {
+	Time    uint32
+	Memory  uint32
+	Threads uint8
+}
+
+// Default Argon2 parameters are tuned for a high-traffic
+// authentication service (<1ms per operation).
+var defaultArgon2Params = argon2Params{
+	Time:    1,
+	Memory:  4 * 1024,
+	Threads: 4,
+}
+
+type argon2Codec interface {
+	encodeArgon2Hash(argon2Params, []byte, []byte) string
+	decodeArgon2Hash(string) (argon2Params, []byte, []byte, error)
+}
+
+type a2Codec struct{}
+
+func (*a2Codec) encodeArgon2Hash(params argon2Params, salt, dk []byte) string {
+	return fmt.Sprintf("$a2$%d$%d$%d$%x$%x", params.Time, params.Memory, params.Threads, salt, dk)
+}
+
+func (*a2Codec) decodeArgon2Hash(s string) (params argon2Params, salt []byte, dk []byte, err error) {
+	if !strings.HasPrefix(s, "$a2$") {
+		err = errors.New("not an Argon2 password hash")
+		return
+	}
+
+	parts := strings.SplitN(s[4:], "$", 5)
+	if len(parts) != 5 {
+		err = errors.New("bad encoding")
+		return
+	}
+
+	var i uint64
+
+	if i, err = strconv.ParseUint(parts[0], 10, 32); err != nil {
+		return
+	}
+	params.Time = uint32(i)
+
+	if i, err = strconv.ParseUint(parts[1], 10, 32); err != nil {
+		return
+	}
+	params.Memory = uint32(i)
+
+	if i, err = strconv.ParseUint(parts[2], 10, 8); err != nil {
+		return
+	}
+	params.Threads = uint8(i)
+
+	salt, err = hex.DecodeString(parts[3])
+	if err != nil {
+		return
+	}
+	dk, err = hex.DecodeString(parts[4])
+	return
+}
+
+type argon2StdCodec struct{}
+
+func (*argon2StdCodec) encodeArgon2Hash(params argon2Params, salt, dk []byte) string {
+	encSalt := base64.RawStdEncoding.EncodeToString(salt)
+	encDK := base64.RawStdEncoding.EncodeToString(dk)
+	return fmt.Sprintf("$argon2i$v=19$m=%d,t=%d,p=%d$%s$%s", params.Memory, params.Time, params.Threads, encSalt, encDK)
+}
+
+func parseArgon2HashParams(s string) (params argon2Params, err error) {
+	params = defaultArgon2Params
+	parts := strings.Split(s, ",")
+	for _, ss := range parts {
+		kv := strings.SplitN(ss, "=", 2)
+		if len(kv) != 2 {
+			err = errors.New("bad parameter encoding")
+			return
+		}
+		var i uint64
+		switch kv[0] {
+		case "t":
+			i, err = strconv.ParseUint(kv[1], 10, 32)
+			params.Time = uint32(i)
+		case "m":
+			i, err = strconv.ParseUint(kv[1], 10, 32)
+			params.Memory = uint32(i)
+		case "p":
+			i, err = strconv.ParseUint(kv[1], 10, 8)
+			params.Threads = uint8(i)
+		default:
+			err = errors.New("unknown parameter in hash")
+		}
+		if err != nil {
+			return
+		}
+	}
+	return
+}
+
+func (*argon2StdCodec) decodeArgon2Hash(s string) (params argon2Params, salt []byte, dk []byte, err error) {
+	if !strings.HasPrefix(s, "$argon2i$") {
+		err = errors.New("not an Argon2 password hash")
+		return
+	}
+
+	parts := strings.SplitN(s[9:], "$", 4)
+	if len(parts) != 4 {
+		err = errors.New("bad encoding")
+		return
+	}
+	if parts[0] != "v=19" {
+		err = errors.New("bad argon2 hash version")
+		return
+	}
+
+	params, err = parseArgon2HashParams(parts[1])
+	if err != nil {
+		return
+	}
+	if salt, err = base64.RawStdEncoding.DecodeString(parts[2]); err != nil {
+		return
+	}
+	dk, err = base64.RawStdEncoding.DecodeString(parts[3])
+
+	log.Printf("params: %+v", params)
+	return
+}
diff --git a/vendor/git.autistici.org/ai3/go-common/pwhash/crypt.go b/vendor/git.autistici.org/ai3/go-common/pwhash/crypt.go
new file mode 100644
index 0000000..3db643a
--- /dev/null
+++ b/vendor/git.autistici.org/ai3/go-common/pwhash/crypt.go
@@ -0,0 +1,39 @@
+package pwhash
+
+import (
+	"crypto/subtle"
+	"fmt"
+
+	"github.com/amoghe/go-crypt"
+)
+
+// systemCryptPasswordHash uses the glibc crypt function.
+type systemCryptPasswordHash struct {
+	hashStr string
+}
+
+// NewSystemCrypt returns a PasswordHash that uses the system crypt(3)
+// function, specifically glibc with its SHA512 algorithm.
+func NewSystemCrypt() PasswordHash {
+	return &systemCryptPasswordHash{"$6$"}
+}
+
+// ComparePassword returns true if the given password matches the
+// encrypted one.
+func (s *systemCryptPasswordHash) ComparePassword(encrypted, password string) bool {
+	enc2, err := crypt.Crypt(password, encrypted)
+	if err != nil {
+		return false
+	}
+	return subtle.ConstantTimeCompare([]byte(encrypted), []byte(enc2)) == 1
+}
+
+// Encrypt the given password using glibc crypt.
+func (s *systemCryptPasswordHash) Encrypt(password string) string {
+	salt := fmt.Sprintf("%s%x$", s.hashStr, getRandomBytes(16))
+	enc, err := crypt.Crypt(password, salt)
+	if err != nil {
+		panic(err)
+	}
+	return enc
+}
diff --git a/vendor/git.autistici.org/ai3/go-common/pwhash/password.go b/vendor/git.autistici.org/ai3/go-common/pwhash/password.go
new file mode 100644
index 0000000..07b093d
--- /dev/null
+++ b/vendor/git.autistici.org/ai3/go-common/pwhash/password.go
@@ -0,0 +1,82 @@
+// Package pwhash provides a simple interface to hashed passwords with
+// support for multiple hashing algorithms.
+//
+// The format is the well-known dollar-separated field string,
+// extended with optional algorithm-specific parameters:
+//
+//     $id[$params...]$salt$encrypted
+//
+// We extend 'id' beyond the values supported by the libc crypt(3)
+// function with the following hashing algorithms:
+//
+// Scrypt (id '$s$'), in which case the parameters are N, R and P.
+//
+// Argon2 (id '$a2$'), with parameters time, memory and threads. To
+// tune Argon2 parameters, you can run the benchmarks in this package:
+// the parameterized benchmarks are named with
+// time/memory(MB)/threads. For nicer results:
+//
+//     go test -bench=Argon2 -run=none . 2>&1 | \
+//         awk '/^Bench/ {ops=1000000000 / $3; print $1 " " ops " ops/sec"}'
+//
+package pwhash
+
+import (
+	"crypto/rand"
+	"io"
+	"strings"
+)
+
+// PasswordHash is the interface for a password hashing algorithm
+// implementation.
+type PasswordHash interface {
+	// ComparePassword returns true if the given password matches
+	// the encrypted one.
+	ComparePassword(string, string) bool
+
+	// Encrypt the given password.
+	Encrypt(string) string
+}
+
+func getRandomBytes(n int) []byte {
+	b := make([]byte, n)
+	_, err := io.ReadFull(rand.Reader, b[:])
+	if err != nil {
+		panic(err)
+	}
+	return b
+}
+
+// A registry of default handlers for decoding passwords.
+var prefixRegistry = map[string]PasswordHash{
+	"$1$":       NewSystemCrypt(),
+	"$5$":       NewSystemCrypt(),
+	"$6$":       NewSystemCrypt(),
+	"$s$":       NewScrypt(),
+	"$a2$":      NewArgon2(),
+	"$argon2i$": NewArgon2Std(),
+}
+
+// ComparePassword returns true if the given password matches the
+// encrypted one.
+func ComparePassword(encrypted, password string) bool {
+	for pfx, h := range prefixRegistry {
+		if strings.HasPrefix(encrypted, pfx) {
+			return h.ComparePassword(encrypted, password)
+		}
+	}
+	return false
+}
+
+// DefaultEncryptAlgorithm is used by the Encrypt function to encrypt
+// passwords.
+var DefaultEncryptAlgorithm PasswordHash
+
+func init() {
+	DefaultEncryptAlgorithm = NewArgon2()
+}
+
+// Encrypt will encrypt a password with the default algorithm.
+func Encrypt(password string) string {
+	return DefaultEncryptAlgorithm.Encrypt(password)
+}
diff --git a/vendor/git.autistici.org/ai3/go-common/pwhash/scrypt.go b/vendor/git.autistici.org/ai3/go-common/pwhash/scrypt.go
new file mode 100644
index 0000000..967bcec
--- /dev/null
+++ b/vendor/git.autistici.org/ai3/go-common/pwhash/scrypt.go
@@ -0,0 +1,116 @@
+package pwhash
+
+import (
+	"crypto/subtle"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"golang.org/x/crypto/scrypt"
+)
+
+var (
+	scryptKeyLen  = 32
+	scryptSaltLen = 16
+)
+
+// ScryptPasswordHash uses the scrypt hashing algorithm.
+type scryptPasswordHash struct {
+	params scryptParams
+}
+
+// NewScrypt returns a PasswordHash that uses the scrypt algorithm
+// with the default parameters.
+func NewScrypt() PasswordHash {
+	return NewScryptWithParams(
+		defaultScryptParams.N,
+		defaultScryptParams.R,
+		defaultScryptParams.P,
+	)
+}
+
+// NewScryptWithParams returns a PasswordHash that uses the scrypt
+// algorithm with the specified parameters.
+func NewScryptWithParams(n, r, p int) PasswordHash {
+	return &scryptPasswordHash{
+		params: scryptParams{
+			N: n,
+			R: r,
+			P: p,
+		},
+	}
+}
+
+// ComparePassword returns true if the given password matches
+// the encrypted one.
+func (s *scryptPasswordHash) ComparePassword(encrypted, password string) bool {
+	params, salt, dk, err := decodeScryptHash(encrypted)
+	if err != nil {
+		return false
+	}
+	dk2, err := scrypt.Key([]byte(password), salt, params.N, params.R, params.P, scryptKeyLen)
+	if err != nil {
+		return false
+	}
+	return subtle.ConstantTimeCompare(dk, dk2) == 1
+}
+
+// Encrypt the given password with the scrypt algorithm.
+func (s *scryptPasswordHash) Encrypt(password string) string {
+	salt := getRandomBytes(scryptSaltLen)
+
+	dk, err := scrypt.Key([]byte(password), salt, s.params.N, s.params.R, s.params.P, scryptKeyLen)
+	if err != nil {
+		panic(err)
+	}
+
+	return encodeScryptHash(s.params, salt, dk)
+}
+
+type scryptParams struct {
+	N int
+	R int
+	P int
+}
+
+var defaultScryptParams = scryptParams{
+	N: 16384,
+	R: 8,
+	P: 1,
+}
+
+func encodeScryptHash(params scryptParams, salt, dk []byte) string {
+	return fmt.Sprintf("$s$%d$%d$%d$%x$%x", params.N, params.R, params.P, salt, dk)
+}
+
+func decodeScryptHash(s string) (params scryptParams, salt []byte, dk []byte, err error) {
+	if !strings.HasPrefix(s, "$s$") {
+		err = errors.New("not a scrypt password hash")
+		return
+	}
+
+	parts := strings.SplitN(s[3:], "$", 5)
+	if len(parts) != 5 {
+		err = errors.New("bad encoding")
+		return
+	}
+
+	if params.N, err = strconv.Atoi(parts[0]); err != nil {
+		return
+	}
+
+	if params.R, err = strconv.Atoi(parts[1]); err != nil {
+		return
+	}
+	if params.P, err = strconv.Atoi(parts[2]); err != nil {
+		return
+	}
+
+	if salt, err = hex.DecodeString(parts[3]); err != nil {
+		return
+	}
+	dk, err = hex.DecodeString(parts[4])
+	return
+}
diff --git a/vendor/github.com/amoghe/go-crypt/.gitignore b/vendor/github.com/amoghe/go-crypt/.gitignore
new file mode 100644
index 0000000..daf913b
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/.gitignore
@@ -0,0 +1,24 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
diff --git a/vendor/github.com/amoghe/go-crypt/.travis.yml b/vendor/github.com/amoghe/go-crypt/.travis.yml
new file mode 100644
index 0000000..1114b03
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/.travis.yml
@@ -0,0 +1,5 @@
+sudo:     false
+language: go
+
+go:
+  - 1.4
diff --git a/vendor/github.com/amoghe/go-crypt/LICENSE b/vendor/github.com/amoghe/go-crypt/LICENSE
new file mode 100644
index 0000000..0451eb4
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Akshay Moghe
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/amoghe/go-crypt/README.md b/vendor/github.com/amoghe/go-crypt/README.md
new file mode 100644
index 0000000..c956ed3
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/README.md
@@ -0,0 +1,108 @@
+go-crypt (`crypt`)
+==================
+
+[![Build Status](https://travis-ci.org/amoghe/go-crypt.svg)](https://travis-ci.org/amoghe/go-crypt)
+
+Package `crypt` provides go language wrappers around crypt(3). For further information on crypt see the
+[man page](http://man7.org/linux/man-pages/man3/crypt.3.html)
+
+If you have questions about how to use crypt (the C function), it is likely this is not the package you
+are looking for.
+
+**NOTE** Depending on the platform, this package provides a `Crypt` function that is backed by different
+flavors of the libc crypt. This is done by detecting the GOOS and trying to build using `crypt_r` (the GNU
+extension) when on linux, and wrapping around plain 'ol `crypt` (guarded by a global lock) otherwise.
+
+Example
+-------
+```go
+import (
+	"fmt"
+	"github.com/amoghe/go-crypt"
+)
+
+func main() {
+	md5, err := crypt.Crypt("password", "in")
+	if err != nil {
+		fmt.Errorf("error:", err)
+		return
+	}
+
+	sha512, err := crypt.Crypt("password", "$6$SomeSaltSomePepper$")
+	if err != nil {
+		fmt.Errorf("error:", err)
+		return
+	}
+
+	fmt.Println("MD5:", md5)
+	fmt.Println("SHA512:", sha512)
+}
+```
+
+A Note On "Salt"
+----------------
+
+You can find out more about salt [here](https://en.wikipedia.org/wiki/Salt_(cryptography))
+
+The hash algorithm can be selected via the salt string. Here is how to do it (relevant
+section from the man page):
+
+```
+   If salt is a character string starting with the characters
+   "$id$" followed by a string terminated by "$":
+
+       $id$salt$encrypted
+
+   then instead of using the DES machine, id identifies the
+   encryption method used and this then determines how the rest
+   of the password string is interpreted.  The following values
+   of id are supported:
+
+          ID  | Method
+          ─────────────────────────────────────────────────────────
+          1   | MD5
+          2a  | Blowfish (not in mainline glibc; added in some
+              | Linux distributions)
+          5   | SHA-256 (since glibc 2.7)
+          6   | SHA-512 (since glibc 2.7)
+
+   So $5$salt$encrypted is an SHA-256 encoded password and
+   $6$salt$encrypted is an SHA-512 encoded one.
+
+   "salt" stands for the up to 16 characters following "$id$" in
+   the salt.  The encrypted part of the password string is the
+   actual computed password.  The size of this string is fixed:
+
+   MD5     | 22 characters
+   SHA-256 | 43 characters
+   SHA-512 | 86 characters
+```
+
+Platforms
+---------
+
+This package has been tested on the following platforms:
+- ubuntu 14.04.2 (libc 2.19)
+- ubuntu 12.04.5 (libc 2.15)
+- centos         (libc 2.17)
+- fedora 22      (libc 2.21)
+
+All the platforms tested on have GNU libc (with extensions) so that the GOOS=linux always
+compiles the reentrant versions of the crypt function (`crypt_r`), and exposes it to go land.
+
+Other platforms (freebsd, netbsd) should also work (in theory) since their libc expose at least
+a posix compliant crypt function. On these platforms the fallback should compile and expose the
+'plain' (non reentrant, thus globally locked) crypt function.
+
+Unfortunately, I do not have access to machines that run anything other than Linux, hence the other
+platforms have not been tested, however I believe they should work just fine. If you can verify this
+(or provide a patch that fixes this), I would be grateful.
+
+TODO
+----
+* Find someone with access to *BSD system(s)
+
+License
+-------
+
+Released under the [MIT License](LICENSE)
diff --git a/vendor/github.com/amoghe/go-crypt/crypt.go b/vendor/github.com/amoghe/go-crypt/crypt.go
new file mode 100644
index 0000000..aa81960
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/crypt.go
@@ -0,0 +1,59 @@
+// +build freebsd netbsd
+
+// Package crypt provides wrappers around functions available in crypt.h
+//
+// It wraps around the GNU specific extension (crypt) when the reentrant version
+// (crypt_r) is unavailable. The non-reentrant version is guarded by a global lock
+// so as to be safely callable from concurrent goroutines.
+package crypt
+
+import (
+	"sync"
+	"unsafe"
+)
+
+/*
+#cgo LDFLAGS: -lcrypt
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <unistd.h>
+*/
+import "C"
+
+var (
+	mu sync.Mutex
+)
+
+// Crypt provides a wrapper around the glibc crypt() function.
+// For the meaning of the arguments, refer to the README.
+func Crypt(pass, salt string) (string, error) {
+	c_pass := C.CString(pass)
+	defer C.free(unsafe.Pointer(c_pass))
+
+	c_salt := C.CString(salt)
+	defer C.free(unsafe.Pointer(c_salt))
+
+	mu.Lock()
+	c_enc, err := C.crypt(c_pass, c_salt)
+	mu.Unlock()
+
+	if c_enc == nil {
+		return "", err
+	}
+	defer C.free(unsafe.Pointer(c_enc))
+
+	// From the crypt(3) man-page. Upon error, crypt_r writes an invalid
+	// hashed passphrase to the output field of their data argument, and
+	// crypt writes an invalid hash to its static storage area. This
+	// string will be shorter than 13 characters, will begin with a ‘*’,
+	// and will not compare equal to setting.
+	hash := C.GoString(c_enc)
+	if len(hash) > 0 && hash[0] == '*' {
+		return "", err
+	}
+
+	// Return nil error if the string is non-nil.
+	// As per the errno.h manpage, functions are allowed to set errno
+	// on success. Caller should ignore errno on success.
+	return C.GoString(c_enc), err
+}
diff --git a/vendor/github.com/amoghe/go-crypt/crypt_r.go b/vendor/github.com/amoghe/go-crypt/crypt_r.go
new file mode 100644
index 0000000..22da04a
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/crypt_r.go
@@ -0,0 +1,78 @@
+// +build linux
+
+// Package crypt provides wrappers around functions available in crypt.h
+//
+// It wraps around the GNU specific extension (crypt_r) when it is available
+// (i.e. where GOOS=linux). This makes the go function reentrant (and thus
+// callable from concurrent goroutines).
+package crypt
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+/*
+#cgo LDFLAGS: -lcrypt
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <string.h>
+#include <crypt.h>
+
+char *gnu_ext_crypt(char *pass, char *salt) {
+  char *enc = NULL;
+  char *ret = NULL;
+  struct crypt_data data;
+  data.initialized = 0;
+
+  enc = crypt_r(pass, salt, &data);
+  if(enc == NULL) {
+    return NULL;
+  }
+
+  ret = (char *)malloc(strlen(enc)+1); // for trailing null
+  strncpy(ret, enc, strlen(enc));
+  ret[strlen(enc)]= '\0'; // paranoid
+
+  return ret;
+}
+*/
+import "C"
+
+// Crypt provides a wrapper around the glibc crypt_r() function.
+// For the meaning of the arguments, refer to the package README.
+func Crypt(pass, salt string) (string, error) {
+	c_pass := C.CString(pass)
+	defer C.free(unsafe.Pointer(c_pass))
+
+	c_salt := C.CString(salt)
+	defer C.free(unsafe.Pointer(c_salt))
+
+	c_enc, err := C.gnu_ext_crypt(c_pass, c_salt)
+	if c_enc == nil {
+		return "", err
+	}
+	defer C.free(unsafe.Pointer(c_enc))
+
+	// From the crypt(3) man-page. Upon error, crypt_r writes an invalid
+	// hashed passphrase to the output field of their data argument, and
+	// crypt writes an invalid hash to its static storage area. This
+	// string will be shorter than 13 characters, will begin with a ‘*’,
+	// and will not compare equal to setting.
+	hash := C.GoString(c_enc)
+	if len(hash) > 0 && hash[0] == '*' {
+		// Make sure we acutally return an error, musl e.g. does not
+		// set errno in all cases here.
+		if err == nil {
+			err = syscall.EINVAL
+		}
+		return "", err
+	}
+
+	// Return nil error if the string is non-nil.
+	// As per the errno.h manpage, functions are allowed to set errno
+	// on success. Caller should ignore errno on success.
+	return hash, nil
+}
diff --git a/vendor/github.com/amoghe/go-crypt/crypt_unsupported.go b/vendor/github.com/amoghe/go-crypt/crypt_unsupported.go
new file mode 100644
index 0000000..d4a3951
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/crypt_unsupported.go
@@ -0,0 +1,17 @@
+// +build darwin windows
+
+// Package crypt provides wrappers around functions available in crypt.h
+//
+// It wraps around the GNU specific extension (crypt) when the reentrant version
+// (crypt_r) is unavailable. The non-reentrant version is guarded by a global lock
+// so as to be safely callable from concurrent goroutines.
+package crypt
+
+import (
+	"errors"
+)
+
+// Crypt does currently not provide an implementation for windows and darwin
+func Crypt(pass, salt string) (string, error) {
+	return "", errors.New("unsupported platform")
+}
diff --git a/vendor/github.com/amoghe/go-crypt/version.go b/vendor/github.com/amoghe/go-crypt/version.go
new file mode 100644
index 0000000..9221286
--- /dev/null
+++ b/vendor/github.com/amoghe/go-crypt/version.go
@@ -0,0 +1,25 @@
+// +build linux
+
+package crypt
+
+/*
+#include <features.h>
+#ifdef __GLIBC__
+#include <gnu/libc-version.h>
+unsigned int get_glibc_minor_version(void) {
+	return __GLIBC_MINOR__;
+}
+#else
+unsigned int get_glibc_minor_version(void) {
+	return 0;
+}
+#endif
+*/
+import "C"
+
+// This function is specific to the tests. It basically checks if
+// we're running with glibc and if the used glibc is new enough
+func checkGlibCVersion() bool {
+	c_minor := C.get_glibc_minor_version()
+	return c_minor >= 17
+}
diff --git a/vendor/github.com/coreos/go-systemd/LICENSE b/vendor/github.com/coreos/go-systemd/LICENSE
new file mode 100644
index 0000000..37ec93a
--- /dev/null
+++ b/vendor/github.com/coreos/go-systemd/LICENSE
@@ -0,0 +1,191 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright
+owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to software source code, documentation source, and configuration
+files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object code,
+generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made
+available under the License, as indicated by a copyright notice that is included
+in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version
+of the Work and any modifications or additions to that Work or Derivative Works
+thereof, that is intentionally submitted to Licensor for inclusion in the Work
+by the copyright owner or by an individual or Legal Entity authorized to submit
+on behalf of the copyright owner. For the purposes of this definition,
+"submitted" means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems, and
+issue tracking systems that are managed by, or on behalf of, the Licensor for
+the purpose of discussing and improving the Work, but excluding communication
+that is conspicuously marked or otherwise designated in writing by the copyright
+owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of Copyright License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the Work and such
+Derivative Works in Source or Object form.
+
+3. Grant of Patent License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable (except as stated in this section) patent license to make, have
+made, use, offer to sell, sell, import, and otherwise transfer the Work, where
+such license applies only to those patent claims licensable by such Contributor
+that are necessarily infringed by their Contribution(s) alone or by combination
+of their Contribution(s) with the Work to which such Contribution(s) was
+submitted. If You institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work or a
+Contribution incorporated within the Work constitutes direct or contributory
+patent infringement, then any patent licenses granted to You under this License
+for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution.
+
+You may reproduce and distribute copies of the Work or Derivative Works thereof
+in any medium, with or without modifications, and in Source or Object form,
+provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of
+this License; and
+You must cause any modified files to carry prominent notices stating that You
+changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute,
+all copyright, patent, trademark, and attribution notices from the Source form
+of the Work, excluding those notices that do not pertain to any part of the
+Derivative Works; and
+If the Work includes a "NOTICE" text file as part of its distribution, then any
+Derivative Works that You distribute must include a readable copy of the
+attribution notices contained within such NOTICE file, excluding those notices
+that do not pertain to any part of the Derivative Works, in at least one of the
+following places: within a NOTICE text file distributed as part of the
+Derivative Works; within the Source form or documentation, if provided along
+with the Derivative Works; or, within a display generated by the Derivative
+Works, if and wherever such third-party notices normally appear. The contents of
+the NOTICE file are for informational purposes only and do not modify the
+License. You may add Your own attribution notices within Derivative Works that
+You distribute, alongside or as an addendum to the NOTICE text from the Work,
+provided that such additional attribution notices cannot be construed as
+modifying the License.
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction, or
+distribution of Your modifications, or for any such Derivative Works as a whole,
+provided Your use, reproduction, and distribution of the Work otherwise complies
+with the conditions stated in this License.
+
+5. Submission of Contributions.
+
+Unless You explicitly state otherwise, any Contribution intentionally submitted
+for inclusion in the Work by You to the Licensor shall be under the terms and
+conditions of this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify the terms of
+any separate license agreement you may have executed with Licensor regarding
+such Contributions.
+
+6. Trademarks.
+
+This License does not grant permission to use the trade names, trademarks,
+service marks, or product names of the Licensor, except as required for
+reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty.
+
+Unless required by applicable law or agreed to in writing, Licensor provides the
+Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+including, without limitation, any warranties or conditions of TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
+solely responsible for determining the appropriateness of using or
+redistributing the Work and assume any risks associated with Your exercise of
+permissions under this License.
+
+8. Limitation of Liability.
+
+In no event and under no legal theory, whether in tort (including negligence),
+contract, or otherwise, unless required by applicable law (such as deliberate
+and grossly negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special, incidental,
+or consequential damages of any character arising as a result of this License or
+out of the use or inability to use the Work (including but not limited to
+damages for loss of goodwill, work stoppage, computer failure or malfunction, or
+any and all other commercial damages or losses), even if such Contributor has
+been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability.
+
+While redistributing the Work or Derivative Works thereof, You may choose to
+offer, and charge a fee for, acceptance of support, warranty, indemnity, or
+other liability obligations and/or rights consistent with this License. However,
+in accepting such obligations, You may act only on Your own behalf and on Your
+sole responsibility, not on behalf of any other Contributor, and only if You
+agree to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work
+
+To apply the Apache License to your work, attach the following boilerplate
+notice, with the fields enclosed by brackets "[]" replaced with your own
+identifying information. (Don't include the brackets!) The text should be
+enclosed in the appropriate comment syntax for the file format. We also
+recommend that a file or class name and description of purpose be included on
+the same "printed page" as the copyright notice for easier identification within
+third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/vendor/github.com/coreos/go-systemd/NOTICE b/vendor/github.com/coreos/go-systemd/NOTICE
new file mode 100644
index 0000000..23a0ada
--- /dev/null
+++ b/vendor/github.com/coreos/go-systemd/NOTICE
@@ -0,0 +1,5 @@
+CoreOS Project
+Copyright 2018 CoreOS, Inc
+
+This product includes software developed at CoreOS, Inc.
+(http://www.coreos.com/).
diff --git a/vendor/github.com/coreos/go-systemd/activation/files.go b/vendor/github.com/coreos/go-systemd/activation/files.go
new file mode 100644
index 0000000..29dd18d
--- /dev/null
+++ b/vendor/github.com/coreos/go-systemd/activation/files.go
@@ -0,0 +1,67 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package activation implements primitives for systemd socket activation.
+package activation
+
+import (
+	"os"
+	"strconv"
+	"strings"
+	"syscall"
+)
+
+const (
+	// listenFdsStart corresponds to `SD_LISTEN_FDS_START`.
+	listenFdsStart = 3
+)
+
+// Files returns a slice containing a `os.File` object for each
+// file descriptor passed to this process via systemd fd-passing protocol.
+//
+// The order of the file descriptors is preserved in the returned slice.
+// `unsetEnv` is typically set to `true` in order to avoid clashes in
+// fd usage and to avoid leaking environment flags to child processes.
+func Files(unsetEnv bool) []*os.File {
+	if unsetEnv {
+		defer os.Unsetenv("LISTEN_PID")
+		defer os.Unsetenv("LISTEN_FDS")
+		defer os.Unsetenv("LISTEN_FDNAMES")
+	}
+
+	pid, err := strconv.Atoi(os.Getenv("LISTEN_PID"))
+	if err != nil || pid != os.Getpid() {
+		return nil
+	}
+
+	nfds, err := strconv.Atoi(os.Getenv("LISTEN_FDS"))
+	if err != nil || nfds == 0 {
+		return nil
+	}
+
+	names := strings.Split(os.Getenv("LISTEN_FDNAMES"), ":")
+
+	files := make([]*os.File, 0, nfds)
+	for fd := listenFdsStart; fd < listenFdsStart+nfds; fd++ {
+		syscall.CloseOnExec(fd)
+		name := "LISTEN_FD_" + strconv.Itoa(fd)
+		offset := fd - listenFdsStart
+		if offset < len(names) && len(names[offset]) > 0 {
+			name = names[offset]
+		}
+		files = append(files, os.NewFile(uintptr(fd), name))
+	}
+
+	return files
+}
diff --git a/vendor/github.com/coreos/go-systemd/activation/listeners.go b/vendor/github.com/coreos/go-systemd/activation/listeners.go
new file mode 100644
index 0000000..3dbe2b0
--- /dev/null
+++ b/vendor/github.com/coreos/go-systemd/activation/listeners.go
@@ -0,0 +1,103 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package activation
+
+import (
+	"crypto/tls"
+	"net"
+)
+
+// Listeners returns a slice containing a net.Listener for each matching socket type
+// passed to this process.
+//
+// The order of the file descriptors is preserved in the returned slice.
+// Nil values are used to fill any gaps. For example if systemd were to return file descriptors
+// corresponding with "udp, tcp, tcp", then the slice would contain {nil, net.Listener, net.Listener}
+func Listeners() ([]net.Listener, error) {
+	files := Files(true)
+	listeners := make([]net.Listener, len(files))
+
+	for i, f := range files {
+		if pc, err := net.FileListener(f); err == nil {
+			listeners[i] = pc
+			f.Close()
+		}
+	}
+	return listeners, nil
+}
+
+// ListenersWithNames maps a listener name to a set of net.Listener instances.
+func ListenersWithNames() (map[string][]net.Listener, error) {
+	files := Files(true)
+	listeners := map[string][]net.Listener{}
+
+	for _, f := range files {
+		if pc, err := net.FileListener(f); err == nil {
+			current, ok := listeners[f.Name()]
+			if !ok {
+				listeners[f.Name()] = []net.Listener{pc}
+			} else {
+				listeners[f.Name()] = append(current, pc)
+			}
+			f.Close()
+		}
+	}
+	return listeners, nil
+}
+
+// TLSListeners returns a slice containing a net.listener for each matching TCP socket type
+// passed to this process.
+// It uses default Listeners func and forces TCP sockets handlers to use TLS based on tlsConfig.
+func TLSListeners(tlsConfig *tls.Config) ([]net.Listener, error) {
+	listeners, err := Listeners()
+
+	if listeners == nil || err != nil {
+		return nil, err
+	}
+
+	if tlsConfig != nil {
+		for i, l := range listeners {
+			// Activate TLS only for TCP sockets
+			if l.Addr().Network() == "tcp" {
+				listeners[i] = tls.NewListener(l, tlsConfig)
+			}
+		}
+	}
+
+	return listeners, err
+}
+
+// TLSListenersWithNames maps a listener name to a net.Listener with
+// the associated TLS configuration.
+func TLSListenersWithNames(tlsConfig *tls.Config) (map[string][]net.Listener, error) {
+	listeners, err := ListenersWithNames()
+
+	if listeners == nil || err != nil {
+		return nil, err
+	}
+
+	if tlsConfig != nil {
+		for _, ll := range listeners {
+			// Activate TLS only for TCP sockets
+			for i, l := range ll {
+				if l.Addr().Network() == "tcp" {
+					ll[i] = tls.NewListener(l, tlsConfig)
+				}
+			}
+		}
+	}
+
+	return listeners, err
+}
diff --git a/vendor/github.com/coreos/go-systemd/activation/packetconns.go b/vendor/github.com/coreos/go-systemd/activation/packetconns.go
new file mode 100644
index 0000000..a972067
--- /dev/null
+++ b/vendor/github.com/coreos/go-systemd/activation/packetconns.go
@@ -0,0 +1,38 @@
+// Copyright 2015 CoreOS, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package activation
+
+import (
+	"net"
+)
+
+// PacketConns returns a slice containing a net.PacketConn for each matching socket type
+// passed to this process.
+//
+// The order of the file descriptors is preserved in the returned slice.
+// Nil values are used to fill any gaps. For example if systemd were to return file descriptors
+// corresponding with "udp, tcp, udp", then the slice would contain {net.PacketConn, nil, net.PacketConn}
+func PacketConns() ([]net.PacketConn, error) {
+	files := Files(true)
+	conns := make([]net.PacketConn, len(files))
+
+	for i, f := range files {
+		if pc, err := net.FilePacketConn(f); err == nil {
+			conns[i] = pc
+			f.Close()
+		}
+	}
+	return conns, nil
+}
diff --git a/vendor/golang.org/x/crypto/LICENSE b/vendor/golang.org/x/crypto/LICENSE
new file mode 100644
index 0000000..2a7cf70
--- /dev/null
+++ b/vendor/golang.org/x/crypto/LICENSE
@@ -0,0 +1,27 @@
+Copyright 2009 The Go Authors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google LLC nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/crypto/PATENTS b/vendor/golang.org/x/crypto/PATENTS
new file mode 100644
index 0000000..7330990
--- /dev/null
+++ b/vendor/golang.org/x/crypto/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/crypto/argon2/argon2.go b/vendor/golang.org/x/crypto/argon2/argon2.go
new file mode 100644
index 0000000..29f0a2d
--- /dev/null
+++ b/vendor/golang.org/x/crypto/argon2/argon2.go
@@ -0,0 +1,283 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package argon2 implements the key derivation function Argon2.
+// Argon2 was selected as the winner of the Password Hashing Competition and can
+// be used to derive cryptographic keys from passwords.
+//
+// For a detailed specification of Argon2 see [1].
+//
+// If you aren't sure which function you need, use Argon2id (IDKey) and
+// the parameter recommendations for your scenario.
+//
+// # Argon2i
+//
+// Argon2i (implemented by Key) is the side-channel resistant version of Argon2.
+// It uses data-independent memory access, which is preferred for password
+// hashing and password-based key derivation. Argon2i requires more passes over
+// memory than Argon2id to protect from trade-off attacks. The recommended
+// parameters (taken from [2]) for non-interactive operations are time=3 and to
+// use the maximum available memory.
+//
+// # Argon2id
+//
+// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining
+// Argon2i and Argon2d. It uses data-independent memory access for the first
+// half of the first iteration over the memory and data-dependent memory access
+// for the rest. Argon2id is side-channel resistant and provides better brute-
+// force cost savings due to time-memory tradeoffs than Argon2i. The recommended
+// parameters for non-interactive operations (taken from [2]) are time=1 and to
+// use the maximum available memory.
+//
+// [1] https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
+// [2] https://tools.ietf.org/html/draft-irtf-cfrg-argon2-03#section-9.3
+package argon2
+
+import (
+	"encoding/binary"
+	"sync"
+
+	"golang.org/x/crypto/blake2b"
+)
+
+// The Argon2 version implemented by this package.
+const Version = 0x13
+
+const (
+	argon2d = iota
+	argon2i
+	argon2id
+)
+
+// Key derives a key from the password, salt, and cost parameters using Argon2i
+// returning a byte slice of length keyLen that can be used as cryptographic
+// key. The CPU cost and parallelism degree must be greater than zero.
+//
+// For example, you can get a derived key for e.g. AES-256 (which needs a
+// 32-byte key) by doing:
+//
+//	key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32)
+//
+// The draft RFC recommends[2] time=3, and memory=32*1024 is a sensible number.
+// If using that amount of memory (32 MB) is not possible in some contexts then
+// the time parameter can be increased to compensate.
+//
+// The time parameter specifies the number of passes over the memory and the
+// memory parameter specifies the size of the memory in KiB. For example
+// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be
+// adjusted to the number of available CPUs. The cost parameters should be
+// increased as memory latency and CPU parallelism increases. Remember to get a
+// good random salt.
+func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
+	return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen)
+}
+
+// IDKey derives a key from the password, salt, and cost parameters using
+// Argon2id returning a byte slice of length keyLen that can be used as
+// cryptographic key. The CPU cost and parallelism degree must be greater than
+// zero.
+//
+// For example, you can get a derived key for e.g. AES-256 (which needs a
+// 32-byte key) by doing:
+//
+//	key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32)
+//
+// The draft RFC recommends[2] time=1, and memory=64*1024 is a sensible number.
+// If using that amount of memory (64 MB) is not possible in some contexts then
+// the time parameter can be increased to compensate.
+//
+// The time parameter specifies the number of passes over the memory and the
+// memory parameter specifies the size of the memory in KiB. For example
+// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be
+// adjusted to the numbers of available CPUs. The cost parameters should be
+// increased as memory latency and CPU parallelism increases. Remember to get a
+// good random salt.
+func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
+	return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen)
+}
+
+func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte {
+	if time < 1 {
+		panic("argon2: number of rounds too small")
+	}
+	if threads < 1 {
+		panic("argon2: parallelism degree too low")
+	}
+	h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
+
+	memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
+	if memory < 2*syncPoints*uint32(threads) {
+		memory = 2 * syncPoints * uint32(threads)
+	}
+	B := initBlocks(&h0, memory, uint32(threads))
+	processBlocks(B, time, memory, uint32(threads), mode)
+	return extractKey(B, memory, uint32(threads), keyLen)
+}
+
+const (
+	blockLength = 128
+	syncPoints  = 4
+)
+
+type block [blockLength]uint64
+
+func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte {
+	var (
+		h0     [blake2b.Size + 8]byte
+		params [24]byte
+		tmp    [4]byte
+	)
+
+	b2, _ := blake2b.New512(nil)
+	binary.LittleEndian.PutUint32(params[0:4], threads)
+	binary.LittleEndian.PutUint32(params[4:8], keyLen)
+	binary.LittleEndian.PutUint32(params[8:12], memory)
+	binary.LittleEndian.PutUint32(params[12:16], time)
+	binary.LittleEndian.PutUint32(params[16:20], uint32(Version))
+	binary.LittleEndian.PutUint32(params[20:24], uint32(mode))
+	b2.Write(params[:])
+	binary.LittleEndian.PutUint32(tmp[:], uint32(len(password)))
+	b2.Write(tmp[:])
+	b2.Write(password)
+	binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt)))
+	b2.Write(tmp[:])
+	b2.Write(salt)
+	binary.LittleEndian.PutUint32(tmp[:], uint32(len(key)))
+	b2.Write(tmp[:])
+	b2.Write(key)
+	binary.LittleEndian.PutUint32(tmp[:], uint32(len(data)))
+	b2.Write(tmp[:])
+	b2.Write(data)
+	b2.Sum(h0[:0])
+	return h0
+}
+
+func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block {
+	var block0 [1024]byte
+	B := make([]block, memory)
+	for lane := uint32(0); lane < threads; lane++ {
+		j := lane * (memory / threads)
+		binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane)
+
+		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
+		blake2bHash(block0[:], h0[:])
+		for i := range B[j+0] {
+			B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
+		}
+
+		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
+		blake2bHash(block0[:], h0[:])
+		for i := range B[j+1] {
+			B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
+		}
+	}
+	return B
+}
+
+func processBlocks(B []block, time, memory, threads uint32, mode int) {
+	lanes := memory / threads
+	segments := lanes / syncPoints
+
+	processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) {
+		var addresses, in, zero block
+		if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
+			in[0] = uint64(n)
+			in[1] = uint64(lane)
+			in[2] = uint64(slice)
+			in[3] = uint64(memory)
+			in[4] = uint64(time)
+			in[5] = uint64(mode)
+		}
+
+		index := uint32(0)
+		if n == 0 && slice == 0 {
+			index = 2 // we have already generated the first two blocks
+			if mode == argon2i || mode == argon2id {
+				in[6]++
+				processBlock(&addresses, &in, &zero)
+				processBlock(&addresses, &addresses, &zero)
+			}
+		}
+
+		offset := lane*lanes + slice*segments + index
+		var random uint64
+		for index < segments {
+			prev := offset - 1
+			if index == 0 && slice == 0 {
+				prev += lanes // last block in lane
+			}
+			if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
+				if index%blockLength == 0 {
+					in[6]++
+					processBlock(&addresses, &in, &zero)
+					processBlock(&addresses, &addresses, &zero)
+				}
+				random = addresses[index%blockLength]
+			} else {
+				random = B[prev][0]
+			}
+			newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index)
+			processBlockXOR(&B[offset], &B[prev], &B[newOffset])
+			index, offset = index+1, offset+1
+		}
+		wg.Done()
+	}
+
+	for n := uint32(0); n < time; n++ {
+		for slice := uint32(0); slice < syncPoints; slice++ {
+			var wg sync.WaitGroup
+			for lane := uint32(0); lane < threads; lane++ {
+				wg.Add(1)
+				go processSegment(n, slice, lane, &wg)
+			}
+			wg.Wait()
+		}
+	}
+
+}
+
+func extractKey(B []block, memory, threads, keyLen uint32) []byte {
+	lanes := memory / threads
+	for lane := uint32(0); lane < threads-1; lane++ {
+		for i, v := range B[(lane*lanes)+lanes-1] {
+			B[memory-1][i] ^= v
+		}
+	}
+
+	var block [1024]byte
+	for i, v := range B[memory-1] {
+		binary.LittleEndian.PutUint64(block[i*8:], v)
+	}
+	key := make([]byte, keyLen)
+	blake2bHash(key, block[:])
+	return key
+}
+
+func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 {
+	refLane := uint32(rand>>32) % threads
+	if n == 0 && slice == 0 {
+		refLane = lane
+	}
+	m, s := 3*segments, ((slice+1)%syncPoints)*segments
+	if lane == refLane {
+		m += index
+	}
+	if n == 0 {
+		m, s = slice*segments, 0
+		if slice == 0 || lane == refLane {
+			m += index
+		}
+	}
+	if index == 0 || lane == refLane {
+		m--
+	}
+	return phi(rand, uint64(m), uint64(s), refLane, lanes)
+}
+
+func phi(rand, m, s uint64, lane, lanes uint32) uint32 {
+	p := rand & 0xFFFFFFFF
+	p = (p * p) >> 32
+	p = (p * m) >> 32
+	return lane*lanes + uint32((s+m-(p+1))%uint64(lanes))
+}
diff --git a/vendor/golang.org/x/crypto/argon2/blake2b.go b/vendor/golang.org/x/crypto/argon2/blake2b.go
new file mode 100644
index 0000000..10f4694
--- /dev/null
+++ b/vendor/golang.org/x/crypto/argon2/blake2b.go
@@ -0,0 +1,53 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package argon2
+
+import (
+	"encoding/binary"
+	"hash"
+
+	"golang.org/x/crypto/blake2b"
+)
+
+// blake2bHash computes an arbitrary long hash value of in
+// and writes the hash to out.
+func blake2bHash(out []byte, in []byte) {
+	var b2 hash.Hash
+	if n := len(out); n < blake2b.Size {
+		b2, _ = blake2b.New(n, nil)
+	} else {
+		b2, _ = blake2b.New512(nil)
+	}
+
+	var buffer [blake2b.Size]byte
+	binary.LittleEndian.PutUint32(buffer[:4], uint32(len(out)))
+	b2.Write(buffer[:4])
+	b2.Write(in)
+
+	if len(out) <= blake2b.Size {
+		b2.Sum(out[:0])
+		return
+	}
+
+	outLen := len(out)
+	b2.Sum(buffer[:0])
+	b2.Reset()
+	copy(out, buffer[:32])
+	out = out[32:]
+	for len(out) > blake2b.Size {
+		b2.Write(buffer[:])
+		b2.Sum(buffer[:0])
+		copy(out, buffer[:32])
+		out = out[32:]
+		b2.Reset()
+	}
+
+	if outLen%blake2b.Size > 0 { // outLen > 64
+		r := ((outLen + 31) / 32) - 2 // ⌈τ /32⌉-2
+		b2, _ = blake2b.New(outLen-32*r, nil)
+	}
+	b2.Write(buffer[:])
+	b2.Sum(out[:0])
+}
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.go b/vendor/golang.org/x/crypto/argon2/blamka_amd64.go
new file mode 100644
index 0000000..063e778
--- /dev/null
+++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.go
@@ -0,0 +1,60 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 && gc && !purego
+
+package argon2
+
+import "golang.org/x/sys/cpu"
+
+func init() {
+	useSSE4 = cpu.X86.HasSSE41
+}
+
+//go:noescape
+func mixBlocksSSE2(out, a, b, c *block)
+
+//go:noescape
+func xorBlocksSSE2(out, a, b, c *block)
+
+//go:noescape
+func blamkaSSE4(b *block)
+
+func processBlockSSE(out, in1, in2 *block, xor bool) {
+	var t block
+	mixBlocksSSE2(&t, in1, in2, &t)
+	if useSSE4 {
+		blamkaSSE4(&t)
+	} else {
+		for i := 0; i < blockLength; i += 16 {
+			blamkaGeneric(
+				&t[i+0], &t[i+1], &t[i+2], &t[i+3],
+				&t[i+4], &t[i+5], &t[i+6], &t[i+7],
+				&t[i+8], &t[i+9], &t[i+10], &t[i+11],
+				&t[i+12], &t[i+13], &t[i+14], &t[i+15],
+			)
+		}
+		for i := 0; i < blockLength/8; i += 2 {
+			blamkaGeneric(
+				&t[i], &t[i+1], &t[16+i], &t[16+i+1],
+				&t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1],
+				&t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1],
+				&t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1],
+			)
+		}
+	}
+	if xor {
+		xorBlocksSSE2(out, in1, in2, &t)
+	} else {
+		mixBlocksSSE2(out, in1, in2, &t)
+	}
+}
+
+func processBlock(out, in1, in2 *block) {
+	processBlockSSE(out, in1, in2, false)
+}
+
+func processBlockXOR(out, in1, in2 *block) {
+	processBlockSSE(out, in1, in2, true)
+}
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
new file mode 100644
index 0000000..c389547
--- /dev/null
+++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
@@ -0,0 +1,2791 @@
+// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT.
+
+//go:build amd64 && gc && !purego
+
+#include "textflag.h"
+
+// func blamkaSSE4(b *block)
+// Requires: SSE2, SSSE3
+TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8
+	MOVQ       b+0(FP), AX
+	MOVOU      ·c40<>+0(SB), X10
+	MOVOU      ·c48<>+0(SB), X11
+	MOVOU      (AX), X0
+	MOVOU      16(AX), X1
+	MOVOU      32(AX), X2
+	MOVOU      48(AX), X3
+	MOVOU      64(AX), X4
+	MOVOU      80(AX), X5
+	MOVOU      96(AX), X6
+	MOVOU      112(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, (AX)
+	MOVOU      X1, 16(AX)
+	MOVOU      X2, 32(AX)
+	MOVOU      X3, 48(AX)
+	MOVOU      X4, 64(AX)
+	MOVOU      X5, 80(AX)
+	MOVOU      X6, 96(AX)
+	MOVOU      X7, 112(AX)
+	MOVOU      128(AX), X0
+	MOVOU      144(AX), X1
+	MOVOU      160(AX), X2
+	MOVOU      176(AX), X3
+	MOVOU      192(AX), X4
+	MOVOU      208(AX), X5
+	MOVOU      224(AX), X6
+	MOVOU      240(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 128(AX)
+	MOVOU      X1, 144(AX)
+	MOVOU      X2, 160(AX)
+	MOVOU      X3, 176(AX)
+	MOVOU      X4, 192(AX)
+	MOVOU      X5, 208(AX)
+	MOVOU      X6, 224(AX)
+	MOVOU      X7, 240(AX)
+	MOVOU      256(AX), X0
+	MOVOU      272(AX), X1
+	MOVOU      288(AX), X2
+	MOVOU      304(AX), X3
+	MOVOU      320(AX), X4
+	MOVOU      336(AX), X5
+	MOVOU      352(AX), X6
+	MOVOU      368(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 256(AX)
+	MOVOU      X1, 272(AX)
+	MOVOU      X2, 288(AX)
+	MOVOU      X3, 304(AX)
+	MOVOU      X4, 320(AX)
+	MOVOU      X5, 336(AX)
+	MOVOU      X6, 352(AX)
+	MOVOU      X7, 368(AX)
+	MOVOU      384(AX), X0
+	MOVOU      400(AX), X1
+	MOVOU      416(AX), X2
+	MOVOU      432(AX), X3
+	MOVOU      448(AX), X4
+	MOVOU      464(AX), X5
+	MOVOU      480(AX), X6
+	MOVOU      496(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 384(AX)
+	MOVOU      X1, 400(AX)
+	MOVOU      X2, 416(AX)
+	MOVOU      X3, 432(AX)
+	MOVOU      X4, 448(AX)
+	MOVOU      X5, 464(AX)
+	MOVOU      X6, 480(AX)
+	MOVOU      X7, 496(AX)
+	MOVOU      512(AX), X0
+	MOVOU      528(AX), X1
+	MOVOU      544(AX), X2
+	MOVOU      560(AX), X3
+	MOVOU      576(AX), X4
+	MOVOU      592(AX), X5
+	MOVOU      608(AX), X6
+	MOVOU      624(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 512(AX)
+	MOVOU      X1, 528(AX)
+	MOVOU      X2, 544(AX)
+	MOVOU      X3, 560(AX)
+	MOVOU      X4, 576(AX)
+	MOVOU      X5, 592(AX)
+	MOVOU      X6, 608(AX)
+	MOVOU      X7, 624(AX)
+	MOVOU      640(AX), X0
+	MOVOU      656(AX), X1
+	MOVOU      672(AX), X2
+	MOVOU      688(AX), X3
+	MOVOU      704(AX), X4
+	MOVOU      720(AX), X5
+	MOVOU      736(AX), X6
+	MOVOU      752(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 640(AX)
+	MOVOU      X1, 656(AX)
+	MOVOU      X2, 672(AX)
+	MOVOU      X3, 688(AX)
+	MOVOU      X4, 704(AX)
+	MOVOU      X5, 720(AX)
+	MOVOU      X6, 736(AX)
+	MOVOU      X7, 752(AX)
+	MOVOU      768(AX), X0
+	MOVOU      784(AX), X1
+	MOVOU      800(AX), X2
+	MOVOU      816(AX), X3
+	MOVOU      832(AX), X4
+	MOVOU      848(AX), X5
+	MOVOU      864(AX), X6
+	MOVOU      880(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 768(AX)
+	MOVOU      X1, 784(AX)
+	MOVOU      X2, 800(AX)
+	MOVOU      X3, 816(AX)
+	MOVOU      X4, 832(AX)
+	MOVOU      X5, 848(AX)
+	MOVOU      X6, 864(AX)
+	MOVOU      X7, 880(AX)
+	MOVOU      896(AX), X0
+	MOVOU      912(AX), X1
+	MOVOU      928(AX), X2
+	MOVOU      944(AX), X3
+	MOVOU      960(AX), X4
+	MOVOU      976(AX), X5
+	MOVOU      992(AX), X6
+	MOVOU      1008(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 896(AX)
+	MOVOU      X1, 912(AX)
+	MOVOU      X2, 928(AX)
+	MOVOU      X3, 944(AX)
+	MOVOU      X4, 960(AX)
+	MOVOU      X5, 976(AX)
+	MOVOU      X6, 992(AX)
+	MOVOU      X7, 1008(AX)
+	MOVOU      (AX), X0
+	MOVOU      128(AX), X1
+	MOVOU      256(AX), X2
+	MOVOU      384(AX), X3
+	MOVOU      512(AX), X4
+	MOVOU      640(AX), X5
+	MOVOU      768(AX), X6
+	MOVOU      896(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, (AX)
+	MOVOU      X1, 128(AX)
+	MOVOU      X2, 256(AX)
+	MOVOU      X3, 384(AX)
+	MOVOU      X4, 512(AX)
+	MOVOU      X5, 640(AX)
+	MOVOU      X6, 768(AX)
+	MOVOU      X7, 896(AX)
+	MOVOU      16(AX), X0
+	MOVOU      144(AX), X1
+	MOVOU      272(AX), X2
+	MOVOU      400(AX), X3
+	MOVOU      528(AX), X4
+	MOVOU      656(AX), X5
+	MOVOU      784(AX), X6
+	MOVOU      912(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 16(AX)
+	MOVOU      X1, 144(AX)
+	MOVOU      X2, 272(AX)
+	MOVOU      X3, 400(AX)
+	MOVOU      X4, 528(AX)
+	MOVOU      X5, 656(AX)
+	MOVOU      X6, 784(AX)
+	MOVOU      X7, 912(AX)
+	MOVOU      32(AX), X0
+	MOVOU      160(AX), X1
+	MOVOU      288(AX), X2
+	MOVOU      416(AX), X3
+	MOVOU      544(AX), X4
+	MOVOU      672(AX), X5
+	MOVOU      800(AX), X6
+	MOVOU      928(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 32(AX)
+	MOVOU      X1, 160(AX)
+	MOVOU      X2, 288(AX)
+	MOVOU      X3, 416(AX)
+	MOVOU      X4, 544(AX)
+	MOVOU      X5, 672(AX)
+	MOVOU      X6, 800(AX)
+	MOVOU      X7, 928(AX)
+	MOVOU      48(AX), X0
+	MOVOU      176(AX), X1
+	MOVOU      304(AX), X2
+	MOVOU      432(AX), X3
+	MOVOU      560(AX), X4
+	MOVOU      688(AX), X5
+	MOVOU      816(AX), X6
+	MOVOU      944(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 48(AX)
+	MOVOU      X1, 176(AX)
+	MOVOU      X2, 304(AX)
+	MOVOU      X3, 432(AX)
+	MOVOU      X4, 560(AX)
+	MOVOU      X5, 688(AX)
+	MOVOU      X6, 816(AX)
+	MOVOU      X7, 944(AX)
+	MOVOU      64(AX), X0
+	MOVOU      192(AX), X1
+	MOVOU      320(AX), X2
+	MOVOU      448(AX), X3
+	MOVOU      576(AX), X4
+	MOVOU      704(AX), X5
+	MOVOU      832(AX), X6
+	MOVOU      960(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 64(AX)
+	MOVOU      X1, 192(AX)
+	MOVOU      X2, 320(AX)
+	MOVOU      X3, 448(AX)
+	MOVOU      X4, 576(AX)
+	MOVOU      X5, 704(AX)
+	MOVOU      X6, 832(AX)
+	MOVOU      X7, 960(AX)
+	MOVOU      80(AX), X0
+	MOVOU      208(AX), X1
+	MOVOU      336(AX), X2
+	MOVOU      464(AX), X3
+	MOVOU      592(AX), X4
+	MOVOU      720(AX), X5
+	MOVOU      848(AX), X6
+	MOVOU      976(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 80(AX)
+	MOVOU      X1, 208(AX)
+	MOVOU      X2, 336(AX)
+	MOVOU      X3, 464(AX)
+	MOVOU      X4, 592(AX)
+	MOVOU      X5, 720(AX)
+	MOVOU      X6, 848(AX)
+	MOVOU      X7, 976(AX)
+	MOVOU      96(AX), X0
+	MOVOU      224(AX), X1
+	MOVOU      352(AX), X2
+	MOVOU      480(AX), X3
+	MOVOU      608(AX), X4
+	MOVOU      736(AX), X5
+	MOVOU      864(AX), X6
+	MOVOU      992(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 96(AX)
+	MOVOU      X1, 224(AX)
+	MOVOU      X2, 352(AX)
+	MOVOU      X3, 480(AX)
+	MOVOU      X4, 608(AX)
+	MOVOU      X5, 736(AX)
+	MOVOU      X6, 864(AX)
+	MOVOU      X7, 992(AX)
+	MOVOU      112(AX), X0
+	MOVOU      240(AX), X1
+	MOVOU      368(AX), X2
+	MOVOU      496(AX), X3
+	MOVOU      624(AX), X4
+	MOVOU      752(AX), X5
+	MOVOU      880(AX), X6
+	MOVOU      1008(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 112(AX)
+	MOVOU      X1, 240(AX)
+	MOVOU      X2, 368(AX)
+	MOVOU      X3, 496(AX)
+	MOVOU      X4, 624(AX)
+	MOVOU      X5, 752(AX)
+	MOVOU      X6, 880(AX)
+	MOVOU      X7, 1008(AX)
+	RET
+
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
+
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
+
+// func mixBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32
+	MOVQ out+0(FP), DX
+	MOVQ a+8(FP), AX
+	MOVQ b+16(FP), BX
+	MOVQ c+24(FP), CX
+	MOVQ $0x00000080, DI
+
+loop:
+	MOVOU (AX), X0
+	MOVOU (BX), X1
+	MOVOU (CX), X2
+	PXOR  X1, X0
+	PXOR  X2, X0
+	MOVOU X0, (DX)
+	ADDQ  $0x10, AX
+	ADDQ  $0x10, BX
+	ADDQ  $0x10, CX
+	ADDQ  $0x10, DX
+	SUBQ  $0x02, DI
+	JA    loop
+	RET
+
+// func xorBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32
+	MOVQ out+0(FP), DX
+	MOVQ a+8(FP), AX
+	MOVQ b+16(FP), BX
+	MOVQ c+24(FP), CX
+	MOVQ $0x00000080, DI
+
+loop:
+	MOVOU (AX), X0
+	MOVOU (BX), X1
+	MOVOU (CX), X2
+	MOVOU (DX), X3
+	PXOR  X1, X0
+	PXOR  X2, X0
+	PXOR  X3, X0
+	MOVOU X0, (DX)
+	ADDQ  $0x10, AX
+	ADDQ  $0x10, BX
+	ADDQ  $0x10, CX
+	ADDQ  $0x10, DX
+	SUBQ  $0x02, DI
+	JA    loop
+	RET
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_generic.go b/vendor/golang.org/x/crypto/argon2/blamka_generic.go
new file mode 100644
index 0000000..a481b22
--- /dev/null
+++ b/vendor/golang.org/x/crypto/argon2/blamka_generic.go
@@ -0,0 +1,163 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package argon2
+
+var useSSE4 bool
+
+func processBlockGeneric(out, in1, in2 *block, xor bool) {
+	var t block
+	for i := range t {
+		t[i] = in1[i] ^ in2[i]
+	}
+	for i := 0; i < blockLength; i += 16 {
+		blamkaGeneric(
+			&t[i+0], &t[i+1], &t[i+2], &t[i+3],
+			&t[i+4], &t[i+5], &t[i+6], &t[i+7],
+			&t[i+8], &t[i+9], &t[i+10], &t[i+11],
+			&t[i+12], &t[i+13], &t[i+14], &t[i+15],
+		)
+	}
+	for i := 0; i < blockLength/8; i += 2 {
+		blamkaGeneric(
+			&t[i], &t[i+1], &t[16+i], &t[16+i+1],
+			&t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1],
+			&t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1],
+			&t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1],
+		)
+	}
+	if xor {
+		for i := range t {
+			out[i] ^= in1[i] ^ in2[i] ^ t[i]
+		}
+	} else {
+		for i := range t {
+			out[i] = in1[i] ^ in2[i] ^ t[i]
+		}
+	}
+}
+
+func blamkaGeneric(t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15 *uint64) {
+	v00, v01, v02, v03 := *t00, *t01, *t02, *t03
+	v04, v05, v06, v07 := *t04, *t05, *t06, *t07
+	v08, v09, v10, v11 := *t08, *t09, *t10, *t11
+	v12, v13, v14, v15 := *t12, *t13, *t14, *t15
+
+	v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
+	v12 ^= v00
+	v12 = v12>>32 | v12<<32
+	v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
+	v04 ^= v08
+	v04 = v04>>24 | v04<<40
+
+	v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
+	v12 ^= v00
+	v12 = v12>>16 | v12<<48
+	v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
+	v04 ^= v08
+	v04 = v04>>63 | v04<<1
+
+	v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
+	v13 ^= v01
+	v13 = v13>>32 | v13<<32
+	v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
+	v05 ^= v09
+	v05 = v05>>24 | v05<<40
+
+	v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
+	v13 ^= v01
+	v13 = v13>>16 | v13<<48
+	v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
+	v05 ^= v09
+	v05 = v05>>63 | v05<<1
+
+	v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
+	v14 ^= v02
+	v14 = v14>>32 | v14<<32
+	v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
+	v06 ^= v10
+	v06 = v06>>24 | v06<<40
+
+	v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
+	v14 ^= v02
+	v14 = v14>>16 | v14<<48
+	v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
+	v06 ^= v10
+	v06 = v06>>63 | v06<<1
+
+	v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
+	v15 ^= v03
+	v15 = v15>>32 | v15<<32
+	v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
+	v07 ^= v11
+	v07 = v07>>24 | v07<<40
+
+	v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
+	v15 ^= v03
+	v15 = v15>>16 | v15<<48
+	v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
+	v07 ^= v11
+	v07 = v07>>63 | v07<<1
+
+	v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
+	v15 ^= v00
+	v15 = v15>>32 | v15<<32
+	v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
+	v05 ^= v10
+	v05 = v05>>24 | v05<<40
+
+	v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
+	v15 ^= v00
+	v15 = v15>>16 | v15<<48
+	v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
+	v05 ^= v10
+	v05 = v05>>63 | v05<<1
+
+	v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
+	v12 ^= v01
+	v12 = v12>>32 | v12<<32
+	v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
+	v06 ^= v11
+	v06 = v06>>24 | v06<<40
+
+	v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
+	v12 ^= v01
+	v12 = v12>>16 | v12<<48
+	v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
+	v06 ^= v11
+	v06 = v06>>63 | v06<<1
+
+	v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
+	v13 ^= v02
+	v13 = v13>>32 | v13<<32
+	v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
+	v07 ^= v08
+	v07 = v07>>24 | v07<<40
+
+	v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
+	v13 ^= v02
+	v13 = v13>>16 | v13<<48
+	v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
+	v07 ^= v08
+	v07 = v07>>63 | v07<<1
+
+	v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
+	v14 ^= v03
+	v14 = v14>>32 | v14<<32
+	v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
+	v04 ^= v09
+	v04 = v04>>24 | v04<<40
+
+	v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
+	v14 ^= v03
+	v14 = v14>>16 | v14<<48
+	v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
+	v04 ^= v09
+	v04 = v04>>63 | v04<<1
+
+	*t00, *t01, *t02, *t03 = v00, v01, v02, v03
+	*t04, *t05, *t06, *t07 = v04, v05, v06, v07
+	*t08, *t09, *t10, *t11 = v08, v09, v10, v11
+	*t12, *t13, *t14, *t15 = v12, v13, v14, v15
+}
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_ref.go b/vendor/golang.org/x/crypto/argon2/blamka_ref.go
new file mode 100644
index 0000000..16d58c6
--- /dev/null
+++ b/vendor/golang.org/x/crypto/argon2/blamka_ref.go
@@ -0,0 +1,15 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 || purego || !gc
+
+package argon2
+
+func processBlock(out, in1, in2 *block) {
+	processBlockGeneric(out, in1, in2, false)
+}
+
+func processBlockXOR(out, in1, in2 *block) {
+	processBlockGeneric(out, in1, in2, true)
+}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b.go b/vendor/golang.org/x/crypto/blake2b/blake2b.go
new file mode 100644
index 0000000..d2e98d4
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/blake2b.go
@@ -0,0 +1,291 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package blake2b implements the BLAKE2b hash algorithm defined by RFC 7693
+// and the extendable output function (XOF) BLAKE2Xb.
+//
+// BLAKE2b is optimized for 64-bit platforms—including NEON-enabled ARMs—and
+// produces digests of any size between 1 and 64 bytes.
+// For a detailed specification of BLAKE2b see https://blake2.net/blake2.pdf
+// and for BLAKE2Xb see https://blake2.net/blake2x.pdf
+//
+// If you aren't sure which function you need, use BLAKE2b (Sum512 or New512).
+// If you need a secret-key MAC (message authentication code), use the New512
+// function with a non-nil key.
+//
+// BLAKE2X is a construction to compute hash values larger than 64 bytes. It
+// can produce hash values between 0 and 4 GiB.
+package blake2b
+
+import (
+	"encoding/binary"
+	"errors"
+	"hash"
+)
+
+const (
+	// The blocksize of BLAKE2b in bytes.
+	BlockSize = 128
+	// The hash size of BLAKE2b-512 in bytes.
+	Size = 64
+	// The hash size of BLAKE2b-384 in bytes.
+	Size384 = 48
+	// The hash size of BLAKE2b-256 in bytes.
+	Size256 = 32
+)
+
+var (
+	useAVX2 bool
+	useAVX  bool
+	useSSE4 bool
+)
+
+var (
+	errKeySize  = errors.New("blake2b: invalid key size")
+	errHashSize = errors.New("blake2b: invalid hash size")
+)
+
+var iv = [8]uint64{
+	0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
+	0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
+}
+
+// Sum512 returns the BLAKE2b-512 checksum of the data.
+func Sum512(data []byte) [Size]byte {
+	var sum [Size]byte
+	checkSum(&sum, Size, data)
+	return sum
+}
+
+// Sum384 returns the BLAKE2b-384 checksum of the data.
+func Sum384(data []byte) [Size384]byte {
+	var sum [Size]byte
+	var sum384 [Size384]byte
+	checkSum(&sum, Size384, data)
+	copy(sum384[:], sum[:Size384])
+	return sum384
+}
+
+// Sum256 returns the BLAKE2b-256 checksum of the data.
+func Sum256(data []byte) [Size256]byte {
+	var sum [Size]byte
+	var sum256 [Size256]byte
+	checkSum(&sum, Size256, data)
+	copy(sum256[:], sum[:Size256])
+	return sum256
+}
+
+// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
+// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
+func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) }
+
+// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
+// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
+func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) }
+
+// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
+// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
+func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) }
+
+// New returns a new hash.Hash computing the BLAKE2b checksum with a custom length.
+// A non-nil key turns the hash into a MAC. The key must be between zero and 64 bytes long.
+// The hash size can be a value between 1 and 64 but it is highly recommended to use
+// values equal or greater than:
+// - 32 if BLAKE2b is used as a hash function (The key is zero bytes long).
+// - 16 if BLAKE2b is used as a MAC function (The key is at least 16 bytes long).
+// When the key is nil, the returned hash.Hash implements BinaryMarshaler
+// and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash.
+func New(size int, key []byte) (hash.Hash, error) { return newDigest(size, key) }
+
+func newDigest(hashSize int, key []byte) (*digest, error) {
+	if hashSize < 1 || hashSize > Size {
+		return nil, errHashSize
+	}
+	if len(key) > Size {
+		return nil, errKeySize
+	}
+	d := &digest{
+		size:   hashSize,
+		keyLen: len(key),
+	}
+	copy(d.key[:], key)
+	d.Reset()
+	return d, nil
+}
+
+func checkSum(sum *[Size]byte, hashSize int, data []byte) {
+	h := iv
+	h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24)
+	var c [2]uint64
+
+	if length := len(data); length > BlockSize {
+		n := length &^ (BlockSize - 1)
+		if length == n {
+			n -= BlockSize
+		}
+		hashBlocks(&h, &c, 0, data[:n])
+		data = data[n:]
+	}
+
+	var block [BlockSize]byte
+	offset := copy(block[:], data)
+	remaining := uint64(BlockSize - offset)
+	if c[0] < remaining {
+		c[1]--
+	}
+	c[0] -= remaining
+
+	hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
+
+	for i, v := range h[:(hashSize+7)/8] {
+		binary.LittleEndian.PutUint64(sum[8*i:], v)
+	}
+}
+
+type digest struct {
+	h      [8]uint64
+	c      [2]uint64
+	size   int
+	block  [BlockSize]byte
+	offset int
+
+	key    [BlockSize]byte
+	keyLen int
+}
+
+const (
+	magic         = "b2b"
+	marshaledSize = len(magic) + 8*8 + 2*8 + 1 + BlockSize + 1
+)
+
+func (d *digest) MarshalBinary() ([]byte, error) {
+	if d.keyLen != 0 {
+		return nil, errors.New("crypto/blake2b: cannot marshal MACs")
+	}
+	b := make([]byte, 0, marshaledSize)
+	b = append(b, magic...)
+	for i := 0; i < 8; i++ {
+		b = appendUint64(b, d.h[i])
+	}
+	b = appendUint64(b, d.c[0])
+	b = appendUint64(b, d.c[1])
+	// Maximum value for size is 64
+	b = append(b, byte(d.size))
+	b = append(b, d.block[:]...)
+	b = append(b, byte(d.offset))
+	return b, nil
+}
+
+func (d *digest) UnmarshalBinary(b []byte) error {
+	if len(b) < len(magic) || string(b[:len(magic)]) != magic {
+		return errors.New("crypto/blake2b: invalid hash state identifier")
+	}
+	if len(b) != marshaledSize {
+		return errors.New("crypto/blake2b: invalid hash state size")
+	}
+	b = b[len(magic):]
+	for i := 0; i < 8; i++ {
+		b, d.h[i] = consumeUint64(b)
+	}
+	b, d.c[0] = consumeUint64(b)
+	b, d.c[1] = consumeUint64(b)
+	d.size = int(b[0])
+	b = b[1:]
+	copy(d.block[:], b[:BlockSize])
+	b = b[BlockSize:]
+	d.offset = int(b[0])
+	return nil
+}
+
+func (d *digest) BlockSize() int { return BlockSize }
+
+func (d *digest) Size() int { return d.size }
+
+func (d *digest) Reset() {
+	d.h = iv
+	d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24)
+	d.offset, d.c[0], d.c[1] = 0, 0, 0
+	if d.keyLen > 0 {
+		d.block = d.key
+		d.offset = BlockSize
+	}
+}
+
+func (d *digest) Write(p []byte) (n int, err error) {
+	n = len(p)
+
+	if d.offset > 0 {
+		remaining := BlockSize - d.offset
+		if n <= remaining {
+			d.offset += copy(d.block[d.offset:], p)
+			return
+		}
+		copy(d.block[d.offset:], p[:remaining])
+		hashBlocks(&d.h, &d.c, 0, d.block[:])
+		d.offset = 0
+		p = p[remaining:]
+	}
+
+	if length := len(p); length > BlockSize {
+		nn := length &^ (BlockSize - 1)
+		if length == nn {
+			nn -= BlockSize
+		}
+		hashBlocks(&d.h, &d.c, 0, p[:nn])
+		p = p[nn:]
+	}
+
+	if len(p) > 0 {
+		d.offset += copy(d.block[:], p)
+	}
+
+	return
+}
+
+func (d *digest) Sum(sum []byte) []byte {
+	var hash [Size]byte
+	d.finalize(&hash)
+	return append(sum, hash[:d.size]...)
+}
+
+func (d *digest) finalize(hash *[Size]byte) {
+	var block [BlockSize]byte
+	copy(block[:], d.block[:d.offset])
+	remaining := uint64(BlockSize - d.offset)
+
+	c := d.c
+	if c[0] < remaining {
+		c[1]--
+	}
+	c[0] -= remaining
+
+	h := d.h
+	hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
+
+	for i, v := range h {
+		binary.LittleEndian.PutUint64(hash[8*i:], v)
+	}
+}
+
+func appendUint64(b []byte, x uint64) []byte {
+	var a [8]byte
+	binary.BigEndian.PutUint64(a[:], x)
+	return append(b, a[:]...)
+}
+
+func appendUint32(b []byte, x uint32) []byte {
+	var a [4]byte
+	binary.BigEndian.PutUint32(a[:], x)
+	return append(b, a[:]...)
+}
+
+func consumeUint64(b []byte) ([]byte, uint64) {
+	x := binary.BigEndian.Uint64(b)
+	return b[8:], x
+}
+
+func consumeUint32(b []byte) ([]byte, uint32) {
+	x := binary.BigEndian.Uint32(b)
+	return b[4:], x
+}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go
new file mode 100644
index 0000000..199c21d
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go
@@ -0,0 +1,37 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 && gc && !purego
+
+package blake2b
+
+import "golang.org/x/sys/cpu"
+
+func init() {
+	useAVX2 = cpu.X86.HasAVX2
+	useAVX = cpu.X86.HasAVX
+	useSSE4 = cpu.X86.HasSSE41
+}
+
+//go:noescape
+func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
+
+//go:noescape
+func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
+
+//go:noescape
+func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
+
+func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
+	switch {
+	case useAVX2:
+		hashBlocksAVX2(h, c, flag, blocks)
+	case useAVX:
+		hashBlocksAVX(h, c, flag, blocks)
+	case useSSE4:
+		hashBlocksSSE4(h, c, flag, blocks)
+	default:
+		hashBlocksGeneric(h, c, flag, blocks)
+	}
+}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
new file mode 100644
index 0000000..f75162e
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
@@ -0,0 +1,4559 @@
+// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT.
+
+//go:build amd64 && gc && !purego
+
+#include "textflag.h"
+
+// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
+// Requires: AVX, AVX2
+TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48
+	MOVQ    h+0(FP), AX
+	MOVQ    c+8(FP), BX
+	MOVQ    flag+16(FP), CX
+	MOVQ    blocks_base+24(FP), SI
+	MOVQ    blocks_len+32(FP), DI
+	MOVQ    SP, DX
+	ADDQ    $+31, DX
+	ANDQ    $-32, DX
+	MOVQ    CX, 16(DX)
+	XORQ    CX, CX
+	MOVQ    CX, 24(DX)
+	VMOVDQU ·AVX2_c40<>+0(SB), Y4
+	VMOVDQU ·AVX2_c48<>+0(SB), Y5
+	VMOVDQU (AX), Y8
+	VMOVDQU 32(AX), Y9
+	VMOVDQU ·AVX2_iv0<>+0(SB), Y6
+	VMOVDQU ·AVX2_iv1<>+0(SB), Y7
+	MOVQ    (BX), R8
+	MOVQ    8(BX), R9
+	MOVQ    R9, 8(DX)
+
+loop:
+	ADDQ $0x80, R8
+	MOVQ R8, (DX)
+	CMPQ R8, $0x80
+	JGE  noinc
+	INCQ R9
+	MOVQ R9, 8(DX)
+
+noinc:
+	VMOVDQA     Y8, Y0
+	VMOVDQA     Y9, Y1
+	VMOVDQA     Y6, Y2
+	VPXOR       (DX), Y7, Y3
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x26
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x10
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x08
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x28
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x40
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x50
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x48
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x58
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VMOVDQA     Y12, 32(DX)
+	VMOVDQA     Y13, 64(DX)
+	VMOVDQA     Y14, 96(DX)
+	VMOVDQA     Y15, 128(DX)
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x70
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x20
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x50
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x40
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x58
+	VPSHUFD     $0x4e, (SI), X14
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x28
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x60
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x10
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VMOVDQA     Y12, 160(DX)
+	VMOVDQA     Y13, 192(DX)
+	VMOVDQA     Y14, 224(DX)
+	VMOVDQA     Y15, 256(DX)
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x28
+	VMOVDQU     88(SI), X12
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x40
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x2e
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x50
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x70
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x30
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x38
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x48
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x08
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x10
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x28
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x30
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x1e
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x50
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x48
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x28
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x2e
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x38
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x70
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x58
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x08
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x60
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x10
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x1e
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x30
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x60
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x50
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x20
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x38
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x68
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x28
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x60
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x08
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x28
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x78
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x36
+	VPSHUFD     $0x4e, 64(SI), X11
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x30
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x38
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x68
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x38
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x58
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x70
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x28
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x78
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x3e
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x20
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x30
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x70
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x1e
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x78
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x48
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x08
+	VMOVDQU     96(SI), X14
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x10
+	VMOVDQU     32(SI), X11
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x38
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x50
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x40
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x10
+	VPSHUFD     $0x4e, 40(SI), X11
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x20
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x78
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x48
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x58
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x70
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x1e
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	VPADDQ      32(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      64(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      96(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      128(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	VPADDQ      160(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      192(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      224(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      256(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	VPXOR       Y0, Y8, Y8
+	VPXOR       Y1, Y9, Y9
+	VPXOR       Y2, Y8, Y8
+	VPXOR       Y3, Y9, Y9
+	LEAQ        128(SI), SI
+	SUBQ        $0x80, DI
+	JNE         loop
+	MOVQ        R8, (BX)
+	MOVQ        R9, 8(BX)
+	VMOVDQU     Y8, (AX)
+	VMOVDQU     Y9, 32(AX)
+	VZEROUPPER
+	RET
+
+DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32
+
+DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32
+
+DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32
+
+DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f
+DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32
+
+// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
+// Requires: AVX, SSE2
+TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48
+	MOVQ    h+0(FP), AX
+	MOVQ    c+8(FP), BX
+	MOVQ    flag+16(FP), CX
+	MOVQ    blocks_base+24(FP), SI
+	MOVQ    blocks_len+32(FP), DI
+	MOVQ    SP, R10
+	ADDQ    $0x0f, R10
+	ANDQ    $-16, R10
+	VMOVDQU ·AVX_c40<>+0(SB), X0
+	VMOVDQU ·AVX_c48<>+0(SB), X1
+	VMOVDQA X0, X8
+	VMOVDQA X1, X9
+	VMOVDQU ·AVX_iv3<>+0(SB), X0
+	VMOVDQA X0, (R10)
+	XORQ    CX, (R10)
+	VMOVDQU (AX), X10
+	VMOVDQU 16(AX), X11
+	VMOVDQU 32(AX), X2
+	VMOVDQU 48(AX), X3
+	MOVQ    (BX), R8
+	MOVQ    8(BX), R9
+
+loop:
+	ADDQ $0x80, R8
+	CMPQ R8, $0x80
+	JGE  noinc
+	INCQ R9
+
+noinc:
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0xf9
+	BYTE    $0x6e
+	BYTE    $0xf8
+	BYTE    $0xc4
+	BYTE    $0x43
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0xf9
+	BYTE    $0x01
+	VMOVDQA X10, X0
+	VMOVDQA X11, X1
+	VMOVDQU ·AVX_iv0<>+0(SB), X4
+	VMOVDQU ·AVX_iv1<>+0(SB), X5
+	VMOVDQU ·AVX_iv2<>+0(SB), X6
+	VPXOR   X15, X6, X6
+	VMOVDQA (R10), X7
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x26
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x20
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x08
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x28
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x10
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x38
+	BYTE    $0x01
+	VMOVDQA X12, 16(R10)
+	VMOVDQA X13, 32(R10)
+	VMOVDQA X14, 48(R10)
+	VMOVDQA X15, 64(R10)
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x40
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x58
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x78
+	BYTE    $0x01
+	VMOVDQA X12, 80(R10)
+	VMOVDQA X13, 96(R10)
+	VMOVDQA X14, 112(R10)
+	VMOVDQA X15, 128(R10)
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x50
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x78
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x68
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x40
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x30
+	BYTE    $0x01
+	VMOVDQA X12, 144(R10)
+	VMOVDQA X13, 160(R10)
+	VMOVDQA X14, 176(R10)
+	VMOVDQA X15, 192(R10)
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VPSHUFD $0x4e, (SI), X12
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x58
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x38
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x10
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x18
+	BYTE    $0x01
+	VMOVDQA X12, 208(R10)
+	VMOVDQA X13, 224(R10)
+	VMOVDQA X14, 240(R10)
+	VMOVDQA X15, 256(R10)
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	VMOVDQU 88(SI), X12
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x28
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x40
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x10
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x36
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x50
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x38
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x08
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x48
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x20
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x38
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x68
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x60
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x58
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x70
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x20
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x30
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x3e
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x40
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x36
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x20
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x38
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x78
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x30
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x08
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x40
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x58
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x60
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x2e
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x58
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x40
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x18
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x20
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x78
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x68
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x70
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x38
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x48
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x28
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x50
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	MOVQ    (SI), X12
+	VPSHUFD $0x4e, 64(SI), X13
+	MOVQ    56(SI), X14
+	MOVQ    16(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x58
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x68
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x58
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x08
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x38
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x48
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	MOVQ    40(SI), X12
+	MOVQ    64(SI), X13
+	MOVQ    (SI), X14
+	MOVQ    48(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x10
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x50
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	MOVQ    48(SI), X12
+	MOVQ    88(SI), X13
+	MOVQ    120(SI), X14
+	MOVQ    24(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x2e
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x48
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x40
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VMOVDQU 96(SI), X12
+	MOVQ    8(SI), X13
+	MOVQ    16(SI), X14
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x38
+	BYTE    $0x01
+	VMOVDQU 32(SI), X15
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x50
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x38
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x30
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x40
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x28
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	MOVQ    120(SI), X12
+	MOVQ    24(SI), X13
+	MOVQ    88(SI), X14
+	MOVQ    96(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x48
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x68
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x3e
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	VPADDQ  16(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  32(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  48(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  64(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VPADDQ  80(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  96(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  112(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  128(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	VPADDQ  144(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  160(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  176(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  192(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VPADDQ  208(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  224(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  240(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  256(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	VMOVDQU 32(AX), X14
+	VMOVDQU 48(AX), X15
+	VPXOR   X0, X10, X10
+	VPXOR   X1, X11, X11
+	VPXOR   X2, X14, X14
+	VPXOR   X3, X15, X15
+	VPXOR   X4, X10, X10
+	VPXOR   X5, X11, X11
+	VPXOR   X6, X14, X2
+	VPXOR   X7, X15, X3
+	VMOVDQU X2, 32(AX)
+	VMOVDQU X3, 48(AX)
+	LEAQ    128(SI), SI
+	SUBQ    $0x80, DI
+	JNE     loop
+	VMOVDQU X10, (AX)
+	VMOVDQU X11, 16(AX)
+	MOVQ    R8, (BX)
+	MOVQ    R9, 8(BX)
+	VZEROUPPER
+	RET
+
+DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
new file mode 100644
index 0000000..9a0ce21
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
@@ -0,0 +1,1441 @@
+// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT.
+
+//go:build amd64 && gc && !purego
+
+#include "textflag.h"
+
+// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
+// Requires: SSE2, SSE4.1, SSSE3
+TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48
+	MOVQ  h+0(FP), AX
+	MOVQ  c+8(FP), BX
+	MOVQ  flag+16(FP), CX
+	MOVQ  blocks_base+24(FP), SI
+	MOVQ  blocks_len+32(FP), DI
+	MOVQ  SP, R10
+	ADDQ  $0x0f, R10
+	ANDQ  $-16, R10
+	MOVOU ·iv3<>+0(SB), X0
+	MOVO  X0, (R10)
+	XORQ  CX, (R10)
+	MOVOU ·c40<>+0(SB), X13
+	MOVOU ·c48<>+0(SB), X14
+	MOVOU (AX), X12
+	MOVOU 16(AX), X15
+	MOVQ  (BX), R8
+	MOVQ  8(BX), R9
+
+loop:
+	ADDQ $0x80, R8
+	CMPQ R8, $0x80
+	JGE  noinc
+	INCQ R9
+
+noinc:
+	MOVQ       R8, X8
+	PINSRQ     $0x01, R9, X8
+	MOVO       X12, X0
+	MOVO       X15, X1
+	MOVOU      32(AX), X2
+	MOVOU      48(AX), X3
+	MOVOU      ·iv0<>+0(SB), X4
+	MOVOU      ·iv1<>+0(SB), X5
+	MOVOU      ·iv2<>+0(SB), X6
+	PXOR       X8, X6
+	MOVO       (R10), X7
+	MOVQ       (SI), X8
+	PINSRQ     $0x01, 16(SI), X8
+	MOVQ       32(SI), X9
+	PINSRQ     $0x01, 48(SI), X9
+	MOVQ       8(SI), X10
+	PINSRQ     $0x01, 24(SI), X10
+	MOVQ       40(SI), X11
+	PINSRQ     $0x01, 56(SI), X11
+	MOVO       X8, 16(R10)
+	MOVO       X9, 32(R10)
+	MOVO       X10, 48(R10)
+	MOVO       X11, 64(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       64(SI), X8
+	PINSRQ     $0x01, 80(SI), X8
+	MOVQ       96(SI), X9
+	PINSRQ     $0x01, 112(SI), X9
+	MOVQ       72(SI), X10
+	PINSRQ     $0x01, 88(SI), X10
+	MOVQ       104(SI), X11
+	PINSRQ     $0x01, 120(SI), X11
+	MOVO       X8, 80(R10)
+	MOVO       X9, 96(R10)
+	MOVO       X10, 112(R10)
+	MOVO       X11, 128(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       112(SI), X8
+	PINSRQ     $0x01, 32(SI), X8
+	MOVQ       72(SI), X9
+	PINSRQ     $0x01, 104(SI), X9
+	MOVQ       80(SI), X10
+	PINSRQ     $0x01, 64(SI), X10
+	MOVQ       120(SI), X11
+	PINSRQ     $0x01, 48(SI), X11
+	MOVO       X8, 144(R10)
+	MOVO       X9, 160(R10)
+	MOVO       X10, 176(R10)
+	MOVO       X11, 192(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       8(SI), X8
+	PINSRQ     $0x01, (SI), X8
+	MOVQ       88(SI), X9
+	PINSRQ     $0x01, 40(SI), X9
+	MOVQ       96(SI), X10
+	PINSRQ     $0x01, 16(SI), X10
+	MOVQ       56(SI), X11
+	PINSRQ     $0x01, 24(SI), X11
+	MOVO       X8, 208(R10)
+	MOVO       X9, 224(R10)
+	MOVO       X10, 240(R10)
+	MOVO       X11, 256(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       88(SI), X8
+	PINSRQ     $0x01, 96(SI), X8
+	MOVQ       40(SI), X9
+	PINSRQ     $0x01, 120(SI), X9
+	MOVQ       64(SI), X10
+	PINSRQ     $0x01, (SI), X10
+	MOVQ       16(SI), X11
+	PINSRQ     $0x01, 104(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       80(SI), X8
+	PINSRQ     $0x01, 24(SI), X8
+	MOVQ       56(SI), X9
+	PINSRQ     $0x01, 72(SI), X9
+	MOVQ       112(SI), X10
+	PINSRQ     $0x01, 48(SI), X10
+	MOVQ       8(SI), X11
+	PINSRQ     $0x01, 32(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       56(SI), X8
+	PINSRQ     $0x01, 24(SI), X8
+	MOVQ       104(SI), X9
+	PINSRQ     $0x01, 88(SI), X9
+	MOVQ       72(SI), X10
+	PINSRQ     $0x01, 8(SI), X10
+	MOVQ       96(SI), X11
+	PINSRQ     $0x01, 112(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       16(SI), X8
+	PINSRQ     $0x01, 40(SI), X8
+	MOVQ       32(SI), X9
+	PINSRQ     $0x01, 120(SI), X9
+	MOVQ       48(SI), X10
+	PINSRQ     $0x01, 80(SI), X10
+	MOVQ       (SI), X11
+	PINSRQ     $0x01, 64(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       72(SI), X8
+	PINSRQ     $0x01, 40(SI), X8
+	MOVQ       16(SI), X9
+	PINSRQ     $0x01, 80(SI), X9
+	MOVQ       (SI), X10
+	PINSRQ     $0x01, 56(SI), X10
+	MOVQ       32(SI), X11
+	PINSRQ     $0x01, 120(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       112(SI), X8
+	PINSRQ     $0x01, 88(SI), X8
+	MOVQ       48(SI), X9
+	PINSRQ     $0x01, 24(SI), X9
+	MOVQ       8(SI), X10
+	PINSRQ     $0x01, 96(SI), X10
+	MOVQ       64(SI), X11
+	PINSRQ     $0x01, 104(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       16(SI), X8
+	PINSRQ     $0x01, 48(SI), X8
+	MOVQ       (SI), X9
+	PINSRQ     $0x01, 64(SI), X9
+	MOVQ       96(SI), X10
+	PINSRQ     $0x01, 80(SI), X10
+	MOVQ       88(SI), X11
+	PINSRQ     $0x01, 24(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       32(SI), X8
+	PINSRQ     $0x01, 56(SI), X8
+	MOVQ       120(SI), X9
+	PINSRQ     $0x01, 8(SI), X9
+	MOVQ       104(SI), X10
+	PINSRQ     $0x01, 40(SI), X10
+	MOVQ       112(SI), X11
+	PINSRQ     $0x01, 72(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       96(SI), X8
+	PINSRQ     $0x01, 8(SI), X8
+	MOVQ       112(SI), X9
+	PINSRQ     $0x01, 32(SI), X9
+	MOVQ       40(SI), X10
+	PINSRQ     $0x01, 120(SI), X10
+	MOVQ       104(SI), X11
+	PINSRQ     $0x01, 80(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       (SI), X8
+	PINSRQ     $0x01, 48(SI), X8
+	MOVQ       72(SI), X9
+	PINSRQ     $0x01, 64(SI), X9
+	MOVQ       56(SI), X10
+	PINSRQ     $0x01, 24(SI), X10
+	MOVQ       16(SI), X11
+	PINSRQ     $0x01, 88(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       104(SI), X8
+	PINSRQ     $0x01, 56(SI), X8
+	MOVQ       96(SI), X9
+	PINSRQ     $0x01, 24(SI), X9
+	MOVQ       88(SI), X10
+	PINSRQ     $0x01, 112(SI), X10
+	MOVQ       8(SI), X11
+	PINSRQ     $0x01, 72(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       40(SI), X8
+	PINSRQ     $0x01, 120(SI), X8
+	MOVQ       64(SI), X9
+	PINSRQ     $0x01, 16(SI), X9
+	MOVQ       (SI), X10
+	PINSRQ     $0x01, 32(SI), X10
+	MOVQ       48(SI), X11
+	PINSRQ     $0x01, 80(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       48(SI), X8
+	PINSRQ     $0x01, 112(SI), X8
+	MOVQ       88(SI), X9
+	PINSRQ     $0x01, (SI), X9
+	MOVQ       120(SI), X10
+	PINSRQ     $0x01, 72(SI), X10
+	MOVQ       24(SI), X11
+	PINSRQ     $0x01, 64(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       96(SI), X8
+	PINSRQ     $0x01, 104(SI), X8
+	MOVQ       8(SI), X9
+	PINSRQ     $0x01, 80(SI), X9
+	MOVQ       16(SI), X10
+	PINSRQ     $0x01, 56(SI), X10
+	MOVQ       32(SI), X11
+	PINSRQ     $0x01, 40(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       80(SI), X8
+	PINSRQ     $0x01, 64(SI), X8
+	MOVQ       56(SI), X9
+	PINSRQ     $0x01, 8(SI), X9
+	MOVQ       16(SI), X10
+	PINSRQ     $0x01, 32(SI), X10
+	MOVQ       48(SI), X11
+	PINSRQ     $0x01, 40(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       120(SI), X8
+	PINSRQ     $0x01, 72(SI), X8
+	MOVQ       24(SI), X9
+	PINSRQ     $0x01, 104(SI), X9
+	MOVQ       88(SI), X10
+	PINSRQ     $0x01, 112(SI), X10
+	MOVQ       96(SI), X11
+	PINSRQ     $0x01, (SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	PADDQ      16(R10), X0
+	PADDQ      32(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      48(R10), X0
+	PADDQ      64(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	PADDQ      80(R10), X0
+	PADDQ      96(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      112(R10), X0
+	PADDQ      128(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	PADDQ      144(R10), X0
+	PADDQ      160(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      176(R10), X0
+	PADDQ      192(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	PADDQ      208(R10), X0
+	PADDQ      224(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      240(R10), X0
+	PADDQ      256(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      32(AX), X10
+	MOVOU      48(AX), X11
+	PXOR       X0, X12
+	PXOR       X1, X15
+	PXOR       X2, X10
+	PXOR       X3, X11
+	PXOR       X4, X12
+	PXOR       X5, X15
+	PXOR       X6, X10
+	PXOR       X7, X11
+	MOVOU      X10, 32(AX)
+	MOVOU      X11, 48(AX)
+	LEAQ       128(SI), SI
+	SUBQ       $0x80, DI
+	JNE        loop
+	MOVOU      X12, (AX)
+	MOVOU      X15, 16(AX)
+	MOVQ       R8, (BX)
+	MOVQ       R9, 8(BX)
+	RET
+
+DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·iv3<>(SB), RODATA|NOPTR, $16
+
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
+
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
+
+DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·iv0<>(SB), RODATA|NOPTR, $16
+
+DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·iv1<>(SB), RODATA|NOPTR, $16
+
+DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go
new file mode 100644
index 0000000..3168a8a
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go
@@ -0,0 +1,182 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package blake2b
+
+import (
+	"encoding/binary"
+	"math/bits"
+)
+
+// the precomputed values for BLAKE2b
+// there are 12 16-byte arrays - one for each round
+// the entries are calculated from the sigma constants.
+var precomputed = [12][16]byte{
+	{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
+	{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
+	{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
+	{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
+	{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
+	{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
+	{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
+	{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
+	{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
+	{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
+	{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
+	{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
+}
+
+func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
+	var m [16]uint64
+	c0, c1 := c[0], c[1]
+
+	for i := 0; i < len(blocks); {
+		c0 += BlockSize
+		if c0 < BlockSize {
+			c1++
+		}
+
+		v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]
+		v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]
+		v12 ^= c0
+		v13 ^= c1
+		v14 ^= flag
+
+		for j := range m {
+			m[j] = binary.LittleEndian.Uint64(blocks[i:])
+			i += 8
+		}
+
+		for j := range precomputed {
+			s := &(precomputed[j])
+
+			v0 += m[s[0]]
+			v0 += v4
+			v12 ^= v0
+			v12 = bits.RotateLeft64(v12, -32)
+			v8 += v12
+			v4 ^= v8
+			v4 = bits.RotateLeft64(v4, -24)
+			v1 += m[s[1]]
+			v1 += v5
+			v13 ^= v1
+			v13 = bits.RotateLeft64(v13, -32)
+			v9 += v13
+			v5 ^= v9
+			v5 = bits.RotateLeft64(v5, -24)
+			v2 += m[s[2]]
+			v2 += v6
+			v14 ^= v2
+			v14 = bits.RotateLeft64(v14, -32)
+			v10 += v14
+			v6 ^= v10
+			v6 = bits.RotateLeft64(v6, -24)
+			v3 += m[s[3]]
+			v3 += v7
+			v15 ^= v3
+			v15 = bits.RotateLeft64(v15, -32)
+			v11 += v15
+			v7 ^= v11
+			v7 = bits.RotateLeft64(v7, -24)
+
+			v0 += m[s[4]]
+			v0 += v4
+			v12 ^= v0
+			v12 = bits.RotateLeft64(v12, -16)
+			v8 += v12
+			v4 ^= v8
+			v4 = bits.RotateLeft64(v4, -63)
+			v1 += m[s[5]]
+			v1 += v5
+			v13 ^= v1
+			v13 = bits.RotateLeft64(v13, -16)
+			v9 += v13
+			v5 ^= v9
+			v5 = bits.RotateLeft64(v5, -63)
+			v2 += m[s[6]]
+			v2 += v6
+			v14 ^= v2
+			v14 = bits.RotateLeft64(v14, -16)
+			v10 += v14
+			v6 ^= v10
+			v6 = bits.RotateLeft64(v6, -63)
+			v3 += m[s[7]]
+			v3 += v7
+			v15 ^= v3
+			v15 = bits.RotateLeft64(v15, -16)
+			v11 += v15
+			v7 ^= v11
+			v7 = bits.RotateLeft64(v7, -63)
+
+			v0 += m[s[8]]
+			v0 += v5
+			v15 ^= v0
+			v15 = bits.RotateLeft64(v15, -32)
+			v10 += v15
+			v5 ^= v10
+			v5 = bits.RotateLeft64(v5, -24)
+			v1 += m[s[9]]
+			v1 += v6
+			v12 ^= v1
+			v12 = bits.RotateLeft64(v12, -32)
+			v11 += v12
+			v6 ^= v11
+			v6 = bits.RotateLeft64(v6, -24)
+			v2 += m[s[10]]
+			v2 += v7
+			v13 ^= v2
+			v13 = bits.RotateLeft64(v13, -32)
+			v8 += v13
+			v7 ^= v8
+			v7 = bits.RotateLeft64(v7, -24)
+			v3 += m[s[11]]
+			v3 += v4
+			v14 ^= v3
+			v14 = bits.RotateLeft64(v14, -32)
+			v9 += v14
+			v4 ^= v9
+			v4 = bits.RotateLeft64(v4, -24)
+
+			v0 += m[s[12]]
+			v0 += v5
+			v15 ^= v0
+			v15 = bits.RotateLeft64(v15, -16)
+			v10 += v15
+			v5 ^= v10
+			v5 = bits.RotateLeft64(v5, -63)
+			v1 += m[s[13]]
+			v1 += v6
+			v12 ^= v1
+			v12 = bits.RotateLeft64(v12, -16)
+			v11 += v12
+			v6 ^= v11
+			v6 = bits.RotateLeft64(v6, -63)
+			v2 += m[s[14]]
+			v2 += v7
+			v13 ^= v2
+			v13 = bits.RotateLeft64(v13, -16)
+			v8 += v13
+			v7 ^= v8
+			v7 = bits.RotateLeft64(v7, -63)
+			v3 += m[s[15]]
+			v3 += v4
+			v14 ^= v3
+			v14 = bits.RotateLeft64(v14, -16)
+			v9 += v14
+			v4 ^= v9
+			v4 = bits.RotateLeft64(v4, -63)
+
+		}
+
+		h[0] ^= v0 ^ v8
+		h[1] ^= v1 ^ v9
+		h[2] ^= v2 ^ v10
+		h[3] ^= v3 ^ v11
+		h[4] ^= v4 ^ v12
+		h[5] ^= v5 ^ v13
+		h[6] ^= v6 ^ v14
+		h[7] ^= v7 ^ v15
+	}
+	c[0], c[1] = c0, c1
+}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go
new file mode 100644
index 0000000..6e28668
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go
@@ -0,0 +1,11 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 || purego || !gc
+
+package blake2b
+
+func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
+	hashBlocksGeneric(h, c, flag, blocks)
+}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2x.go b/vendor/golang.org/x/crypto/blake2b/blake2x.go
new file mode 100644
index 0000000..52c414d
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/blake2x.go
@@ -0,0 +1,177 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package blake2b
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// XOF defines the interface to hash functions that
+// support arbitrary-length output.
+type XOF interface {
+	// Write absorbs more data into the hash's state. It panics if called
+	// after Read.
+	io.Writer
+
+	// Read reads more output from the hash. It returns io.EOF if the limit
+	// has been reached.
+	io.Reader
+
+	// Clone returns a copy of the XOF in its current state.
+	Clone() XOF
+
+	// Reset resets the XOF to its initial state.
+	Reset()
+}
+
+// OutputLengthUnknown can be used as the size argument to NewXOF to indicate
+// the length of the output is not known in advance.
+const OutputLengthUnknown = 0
+
+// magicUnknownOutputLength is a magic value for the output size that indicates
+// an unknown number of output bytes.
+const magicUnknownOutputLength = (1 << 32) - 1
+
+// maxOutputLength is the absolute maximum number of bytes to produce when the
+// number of output bytes is unknown.
+const maxOutputLength = (1 << 32) * 64
+
+// NewXOF creates a new variable-output-length hash. The hash either produce a
+// known number of bytes (1 <= size < 2**32-1), or an unknown number of bytes
+// (size == OutputLengthUnknown). In the latter case, an absolute limit of
+// 256GiB applies.
+//
+// A non-nil key turns the hash into a MAC. The key must between
+// zero and 32 bytes long.
+func NewXOF(size uint32, key []byte) (XOF, error) {
+	if len(key) > Size {
+		return nil, errKeySize
+	}
+	if size == magicUnknownOutputLength {
+		// 2^32-1 indicates an unknown number of bytes and thus isn't a
+		// valid length.
+		return nil, errors.New("blake2b: XOF length too large")
+	}
+	if size == OutputLengthUnknown {
+		size = magicUnknownOutputLength
+	}
+	x := &xof{
+		d: digest{
+			size:   Size,
+			keyLen: len(key),
+		},
+		length: size,
+	}
+	copy(x.d.key[:], key)
+	x.Reset()
+	return x, nil
+}
+
+type xof struct {
+	d                digest
+	length           uint32
+	remaining        uint64
+	cfg, root, block [Size]byte
+	offset           int
+	nodeOffset       uint32
+	readMode         bool
+}
+
+func (x *xof) Write(p []byte) (n int, err error) {
+	if x.readMode {
+		panic("blake2b: write to XOF after read")
+	}
+	return x.d.Write(p)
+}
+
+func (x *xof) Clone() XOF {
+	clone := *x
+	return &clone
+}
+
+func (x *xof) Reset() {
+	x.cfg[0] = byte(Size)
+	binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length
+	binary.LittleEndian.PutUint32(x.cfg[12:], x.length)    // XOF length
+	x.cfg[17] = byte(Size)                                 // inner hash size
+
+	x.d.Reset()
+	x.d.h[1] ^= uint64(x.length) << 32
+
+	x.remaining = uint64(x.length)
+	if x.remaining == magicUnknownOutputLength {
+		x.remaining = maxOutputLength
+	}
+	x.offset, x.nodeOffset = 0, 0
+	x.readMode = false
+}
+
+func (x *xof) Read(p []byte) (n int, err error) {
+	if !x.readMode {
+		x.d.finalize(&x.root)
+		x.readMode = true
+	}
+
+	if x.remaining == 0 {
+		return 0, io.EOF
+	}
+
+	n = len(p)
+	if uint64(n) > x.remaining {
+		n = int(x.remaining)
+		p = p[:n]
+	}
+
+	if x.offset > 0 {
+		blockRemaining := Size - x.offset
+		if n < blockRemaining {
+			x.offset += copy(p, x.block[x.offset:])
+			x.remaining -= uint64(n)
+			return
+		}
+		copy(p, x.block[x.offset:])
+		p = p[blockRemaining:]
+		x.offset = 0
+		x.remaining -= uint64(blockRemaining)
+	}
+
+	for len(p) >= Size {
+		binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
+		x.nodeOffset++
+
+		x.d.initConfig(&x.cfg)
+		x.d.Write(x.root[:])
+		x.d.finalize(&x.block)
+
+		copy(p, x.block[:])
+		p = p[Size:]
+		x.remaining -= uint64(Size)
+	}
+
+	if todo := len(p); todo > 0 {
+		if x.remaining < uint64(Size) {
+			x.cfg[0] = byte(x.remaining)
+		}
+		binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
+		x.nodeOffset++
+
+		x.d.initConfig(&x.cfg)
+		x.d.Write(x.root[:])
+		x.d.finalize(&x.block)
+
+		x.offset = copy(p, x.block[:todo])
+		x.remaining -= uint64(todo)
+	}
+	return
+}
+
+func (d *digest) initConfig(cfg *[Size]byte) {
+	d.offset, d.c[0], d.c[1] = 0, 0, 0
+	for i := range d.h {
+		d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(cfg[i*8:])
+	}
+}
diff --git a/vendor/golang.org/x/crypto/blake2b/register.go b/vendor/golang.org/x/crypto/blake2b/register.go
new file mode 100644
index 0000000..54e446e
--- /dev/null
+++ b/vendor/golang.org/x/crypto/blake2b/register.go
@@ -0,0 +1,30 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package blake2b
+
+import (
+	"crypto"
+	"hash"
+)
+
+func init() {
+	newHash256 := func() hash.Hash {
+		h, _ := New256(nil)
+		return h
+	}
+	newHash384 := func() hash.Hash {
+		h, _ := New384(nil)
+		return h
+	}
+
+	newHash512 := func() hash.Hash {
+		h, _ := New512(nil)
+		return h
+	}
+
+	crypto.RegisterHash(crypto.BLAKE2b_256, newHash256)
+	crypto.RegisterHash(crypto.BLAKE2b_384, newHash384)
+	crypto.RegisterHash(crypto.BLAKE2b_512, newHash512)
+}
diff --git a/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go b/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
new file mode 100644
index 0000000..28cd99c
--- /dev/null
+++ b/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
@@ -0,0 +1,77 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package pbkdf2 implements the key derivation function PBKDF2 as defined in RFC
+2898 / PKCS #5 v2.0.
+
+A key derivation function is useful when encrypting data based on a password
+or any other not-fully-random data. It uses a pseudorandom function to derive
+a secure encryption key based on the password.
+
+While v2.0 of the standard defines only one pseudorandom function to use,
+HMAC-SHA1, the drafted v2.1 specification allows use of all five FIPS Approved
+Hash Functions SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512 for HMAC. To
+choose, you can pass the `New` functions from the different SHA packages to
+pbkdf2.Key.
+*/
+package pbkdf2
+
+import (
+	"crypto/hmac"
+	"hash"
+)
+
+// Key derives a key from the password, salt and iteration count, returning a
+// []byte of length keylen that can be used as cryptographic key. The key is
+// derived based on the method described as PBKDF2 with the HMAC variant using
+// the supplied hash function.
+//
+// For example, to use a HMAC-SHA-1 based PBKDF2 key derivation function, you
+// can get a derived key for e.g. AES-256 (which needs a 32-byte key) by
+// doing:
+//
+//	dk := pbkdf2.Key([]byte("some password"), salt, 4096, 32, sha1.New)
+//
+// Remember to get a good random salt. At least 8 bytes is recommended by the
+// RFC.
+//
+// Using a higher iteration count will increase the cost of an exhaustive
+// search but will also make derivation proportionally slower.
+func Key(password, salt []byte, iter, keyLen int, h func() hash.Hash) []byte {
+	prf := hmac.New(h, password)
+	hashLen := prf.Size()
+	numBlocks := (keyLen + hashLen - 1) / hashLen
+
+	var buf [4]byte
+	dk := make([]byte, 0, numBlocks*hashLen)
+	U := make([]byte, hashLen)
+	for block := 1; block <= numBlocks; block++ {
+		// N.B.: || means concatenation, ^ means XOR
+		// for each block T_i = U_1 ^ U_2 ^ ... ^ U_iter
+		// U_1 = PRF(password, salt || uint(i))
+		prf.Reset()
+		prf.Write(salt)
+		buf[0] = byte(block >> 24)
+		buf[1] = byte(block >> 16)
+		buf[2] = byte(block >> 8)
+		buf[3] = byte(block)
+		prf.Write(buf[:4])
+		dk = prf.Sum(dk)
+		T := dk[len(dk)-hashLen:]
+		copy(U, T)
+
+		// U_n = PRF(password, U_(n-1))
+		for n := 2; n <= iter; n++ {
+			prf.Reset()
+			prf.Write(U)
+			U = U[:0]
+			U = prf.Sum(U)
+			for x := range U {
+				T[x] ^= U[x]
+			}
+		}
+	}
+	return dk[:keyLen]
+}
diff --git a/vendor/golang.org/x/crypto/scrypt/scrypt.go b/vendor/golang.org/x/crypto/scrypt/scrypt.go
new file mode 100644
index 0000000..76fa40f
--- /dev/null
+++ b/vendor/golang.org/x/crypto/scrypt/scrypt.go
@@ -0,0 +1,212 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package scrypt implements the scrypt key derivation function as defined in
+// Colin Percival's paper "Stronger Key Derivation via Sequential Memory-Hard
+// Functions" (https://www.tarsnap.com/scrypt/scrypt.pdf).
+package scrypt
+
+import (
+	"crypto/sha256"
+	"encoding/binary"
+	"errors"
+	"math/bits"
+
+	"golang.org/x/crypto/pbkdf2"
+)
+
+const maxInt = int(^uint(0) >> 1)
+
+// blockCopy copies n numbers from src into dst.
+func blockCopy(dst, src []uint32, n int) {
+	copy(dst, src[:n])
+}
+
+// blockXOR XORs numbers from dst with n numbers from src.
+func blockXOR(dst, src []uint32, n int) {
+	for i, v := range src[:n] {
+		dst[i] ^= v
+	}
+}
+
+// salsaXOR applies Salsa20/8 to the XOR of 16 numbers from tmp and in,
+// and puts the result into both tmp and out.
+func salsaXOR(tmp *[16]uint32, in, out []uint32) {
+	w0 := tmp[0] ^ in[0]
+	w1 := tmp[1] ^ in[1]
+	w2 := tmp[2] ^ in[2]
+	w3 := tmp[3] ^ in[3]
+	w4 := tmp[4] ^ in[4]
+	w5 := tmp[5] ^ in[5]
+	w6 := tmp[6] ^ in[6]
+	w7 := tmp[7] ^ in[7]
+	w8 := tmp[8] ^ in[8]
+	w9 := tmp[9] ^ in[9]
+	w10 := tmp[10] ^ in[10]
+	w11 := tmp[11] ^ in[11]
+	w12 := tmp[12] ^ in[12]
+	w13 := tmp[13] ^ in[13]
+	w14 := tmp[14] ^ in[14]
+	w15 := tmp[15] ^ in[15]
+
+	x0, x1, x2, x3, x4, x5, x6, x7, x8 := w0, w1, w2, w3, w4, w5, w6, w7, w8
+	x9, x10, x11, x12, x13, x14, x15 := w9, w10, w11, w12, w13, w14, w15
+
+	for i := 0; i < 8; i += 2 {
+		x4 ^= bits.RotateLeft32(x0+x12, 7)
+		x8 ^= bits.RotateLeft32(x4+x0, 9)
+		x12 ^= bits.RotateLeft32(x8+x4, 13)
+		x0 ^= bits.RotateLeft32(x12+x8, 18)
+
+		x9 ^= bits.RotateLeft32(x5+x1, 7)
+		x13 ^= bits.RotateLeft32(x9+x5, 9)
+		x1 ^= bits.RotateLeft32(x13+x9, 13)
+		x5 ^= bits.RotateLeft32(x1+x13, 18)
+
+		x14 ^= bits.RotateLeft32(x10+x6, 7)
+		x2 ^= bits.RotateLeft32(x14+x10, 9)
+		x6 ^= bits.RotateLeft32(x2+x14, 13)
+		x10 ^= bits.RotateLeft32(x6+x2, 18)
+
+		x3 ^= bits.RotateLeft32(x15+x11, 7)
+		x7 ^= bits.RotateLeft32(x3+x15, 9)
+		x11 ^= bits.RotateLeft32(x7+x3, 13)
+		x15 ^= bits.RotateLeft32(x11+x7, 18)
+
+		x1 ^= bits.RotateLeft32(x0+x3, 7)
+		x2 ^= bits.RotateLeft32(x1+x0, 9)
+		x3 ^= bits.RotateLeft32(x2+x1, 13)
+		x0 ^= bits.RotateLeft32(x3+x2, 18)
+
+		x6 ^= bits.RotateLeft32(x5+x4, 7)
+		x7 ^= bits.RotateLeft32(x6+x5, 9)
+		x4 ^= bits.RotateLeft32(x7+x6, 13)
+		x5 ^= bits.RotateLeft32(x4+x7, 18)
+
+		x11 ^= bits.RotateLeft32(x10+x9, 7)
+		x8 ^= bits.RotateLeft32(x11+x10, 9)
+		x9 ^= bits.RotateLeft32(x8+x11, 13)
+		x10 ^= bits.RotateLeft32(x9+x8, 18)
+
+		x12 ^= bits.RotateLeft32(x15+x14, 7)
+		x13 ^= bits.RotateLeft32(x12+x15, 9)
+		x14 ^= bits.RotateLeft32(x13+x12, 13)
+		x15 ^= bits.RotateLeft32(x14+x13, 18)
+	}
+	x0 += w0
+	x1 += w1
+	x2 += w2
+	x3 += w3
+	x4 += w4
+	x5 += w5
+	x6 += w6
+	x7 += w7
+	x8 += w8
+	x9 += w9
+	x10 += w10
+	x11 += w11
+	x12 += w12
+	x13 += w13
+	x14 += w14
+	x15 += w15
+
+	out[0], tmp[0] = x0, x0
+	out[1], tmp[1] = x1, x1
+	out[2], tmp[2] = x2, x2
+	out[3], tmp[3] = x3, x3
+	out[4], tmp[4] = x4, x4
+	out[5], tmp[5] = x5, x5
+	out[6], tmp[6] = x6, x6
+	out[7], tmp[7] = x7, x7
+	out[8], tmp[8] = x8, x8
+	out[9], tmp[9] = x9, x9
+	out[10], tmp[10] = x10, x10
+	out[11], tmp[11] = x11, x11
+	out[12], tmp[12] = x12, x12
+	out[13], tmp[13] = x13, x13
+	out[14], tmp[14] = x14, x14
+	out[15], tmp[15] = x15, x15
+}
+
+func blockMix(tmp *[16]uint32, in, out []uint32, r int) {
+	blockCopy(tmp[:], in[(2*r-1)*16:], 16)
+	for i := 0; i < 2*r; i += 2 {
+		salsaXOR(tmp, in[i*16:], out[i*8:])
+		salsaXOR(tmp, in[i*16+16:], out[i*8+r*16:])
+	}
+}
+
+func integer(b []uint32, r int) uint64 {
+	j := (2*r - 1) * 16
+	return uint64(b[j]) | uint64(b[j+1])<<32
+}
+
+func smix(b []byte, r, N int, v, xy []uint32) {
+	var tmp [16]uint32
+	R := 32 * r
+	x := xy
+	y := xy[R:]
+
+	j := 0
+	for i := 0; i < R; i++ {
+		x[i] = binary.LittleEndian.Uint32(b[j:])
+		j += 4
+	}
+	for i := 0; i < N; i += 2 {
+		blockCopy(v[i*R:], x, R)
+		blockMix(&tmp, x, y, r)
+
+		blockCopy(v[(i+1)*R:], y, R)
+		blockMix(&tmp, y, x, r)
+	}
+	for i := 0; i < N; i += 2 {
+		j := int(integer(x, r) & uint64(N-1))
+		blockXOR(x, v[j*R:], R)
+		blockMix(&tmp, x, y, r)
+
+		j = int(integer(y, r) & uint64(N-1))
+		blockXOR(y, v[j*R:], R)
+		blockMix(&tmp, y, x, r)
+	}
+	j = 0
+	for _, v := range x[:R] {
+		binary.LittleEndian.PutUint32(b[j:], v)
+		j += 4
+	}
+}
+
+// Key derives a key from the password, salt, and cost parameters, returning
+// a byte slice of length keyLen that can be used as cryptographic key.
+//
+// N is a CPU/memory cost parameter, which must be a power of two greater than 1.
+// r and p must satisfy r * p < 2³⁰. If the parameters do not satisfy the
+// limits, the function returns a nil byte slice and an error.
+//
+// For example, you can get a derived key for e.g. AES-256 (which needs a
+// 32-byte key) by doing:
+//
+//	dk, err := scrypt.Key([]byte("some password"), salt, 32768, 8, 1, 32)
+//
+// The recommended parameters for interactive logins as of 2017 are N=32768, r=8
+// and p=1. The parameters N, r, and p should be increased as memory latency and
+// CPU parallelism increases; consider setting N to the highest power of 2 you
+// can derive within 100 milliseconds. Remember to get a good random salt.
+func Key(password, salt []byte, N, r, p, keyLen int) ([]byte, error) {
+	if N <= 1 || N&(N-1) != 0 {
+		return nil, errors.New("scrypt: N must be > 1 and a power of 2")
+	}
+	if uint64(r)*uint64(p) >= 1<<30 || r > maxInt/128/p || r > maxInt/256 || N > maxInt/128/r {
+		return nil, errors.New("scrypt: parameters are too large")
+	}
+
+	xy := make([]uint32, 64*r)
+	v := make([]uint32, 32*N*r)
+	b := pbkdf2.Key(password, salt, 1, p*128*r, sha256.New)
+
+	for i := 0; i < p; i++ {
+		smix(b[i*128*r:], r, N, v, xy)
+	}
+
+	return pbkdf2.Key(password, b, 1, keyLen, sha256.New), nil
+}
diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE
new file mode 100644
index 0000000..2a7cf70
--- /dev/null
+++ b/vendor/golang.org/x/sys/LICENSE
@@ -0,0 +1,27 @@
+Copyright 2009 The Go Authors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google LLC nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/sys/PATENTS b/vendor/golang.org/x/sys/PATENTS
new file mode 100644
index 0000000..7330990
--- /dev/null
+++ b/vendor/golang.org/x/sys/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
new file mode 100644
index 0000000..269e173
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
@@ -0,0 +1,17 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+
+#include "textflag.h"
+
+//
+// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go
+//
+
+TEXT ·syscall6(SB),NOSPLIT,$0-88
+	JMP	syscall·syscall6(SB)
+
+TEXT ·rawSyscall6(SB),NOSPLIT,$0-88
+	JMP	syscall·rawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/cpu/byteorder.go b/vendor/golang.org/x/sys/cpu/byteorder.go
new file mode 100644
index 0000000..271055b
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/byteorder.go
@@ -0,0 +1,66 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"runtime"
+)
+
+// byteOrder is a subset of encoding/binary.ByteOrder.
+type byteOrder interface {
+	Uint32([]byte) uint32
+	Uint64([]byte) uint64
+}
+
+type littleEndian struct{}
+type bigEndian struct{}
+
+func (littleEndian) Uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func (littleEndian) Uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func (bigEndian) Uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
+}
+
+func (bigEndian) Uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
+		uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
+}
+
+// hostByteOrder returns littleEndian on little-endian machines and
+// bigEndian on big-endian machines.
+func hostByteOrder() byteOrder {
+	switch runtime.GOARCH {
+	case "386", "amd64", "amd64p32",
+		"alpha",
+		"arm", "arm64",
+		"loong64",
+		"mipsle", "mips64le", "mips64p32le",
+		"nios2",
+		"ppc64le",
+		"riscv", "riscv64",
+		"sh":
+		return littleEndian{}
+	case "armbe", "arm64be",
+		"m68k",
+		"mips", "mips64", "mips64p32",
+		"ppc", "ppc64",
+		"s390", "s390x",
+		"shbe",
+		"sparc", "sparc64":
+		return bigEndian{}
+	}
+	panic("unknown architecture")
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go
new file mode 100644
index 0000000..02609d5
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@@ -0,0 +1,312 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cpu implements processor feature detection for
+// various CPU architectures.
+package cpu
+
+import (
+	"os"
+	"strings"
+)
+
+// Initialized reports whether the CPU features were initialized.
+//
+// For some GOOS/GOARCH combinations initialization of the CPU features depends
+// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm
+// Initialized will report false if reading the file fails.
+var Initialized bool
+
+// CacheLinePad is used to pad structs to avoid false sharing.
+type CacheLinePad struct{ _ [cacheLineSize]byte }
+
+// X86 contains the supported CPU features of the
+// current X86/AMD64 platform. If the current platform
+// is not X86/AMD64 then all feature flags are false.
+//
+// X86 is padded to avoid false sharing. Further the HasAVX
+// and HasAVX2 are only set if the OS supports XMM and YMM
+// registers in addition to the CPUID feature bit being set.
+var X86 struct {
+	_                   CacheLinePad
+	HasAES              bool // AES hardware implementation (AES NI)
+	HasADX              bool // Multi-precision add-carry instruction extensions
+	HasAVX              bool // Advanced vector extension
+	HasAVX2             bool // Advanced vector extension 2
+	HasAVX512           bool // Advanced vector extension 512
+	HasAVX512F          bool // Advanced vector extension 512 Foundation Instructions
+	HasAVX512CD         bool // Advanced vector extension 512 Conflict Detection Instructions
+	HasAVX512ER         bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
+	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions
+	HasAVX512VL         bool // Advanced vector extension 512 Vector Length Extensions
+	HasAVX512BW         bool // Advanced vector extension 512 Byte and Word Instructions
+	HasAVX512DQ         bool // Advanced vector extension 512 Doubleword and Quadword Instructions
+	HasAVX512IFMA       bool // Advanced vector extension 512 Integer Fused Multiply Add
+	HasAVX512VBMI       bool // Advanced vector extension 512 Vector Byte Manipulation Instructions
+	HasAVX5124VNNIW     bool // Advanced vector extension 512 Vector Neural Network Instructions Word variable precision
+	HasAVX5124FMAPS     bool // Advanced vector extension 512 Fused Multiply Accumulation Packed Single precision
+	HasAVX512VPOPCNTDQ  bool // Advanced vector extension 512 Double and quad word population count instructions
+	HasAVX512VPCLMULQDQ bool // Advanced vector extension 512 Vector carry-less multiply operations
+	HasAVX512VNNI       bool // Advanced vector extension 512 Vector Neural Network Instructions
+	HasAVX512GFNI       bool // Advanced vector extension 512 Galois field New Instructions
+	HasAVX512VAES       bool // Advanced vector extension 512 Vector AES instructions
+	HasAVX512VBMI2      bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
+	HasAVX512BITALG     bool // Advanced vector extension 512 Bit Algorithms
+	HasAVX512BF16       bool // Advanced vector extension 512 BFloat16 Instructions
+	HasAMXTile          bool // Advanced Matrix Extension Tile instructions
+	HasAMXInt8          bool // Advanced Matrix Extension Int8 instructions
+	HasAMXBF16          bool // Advanced Matrix Extension BFloat16 instructions
+	HasBMI1             bool // Bit manipulation instruction set 1
+	HasBMI2             bool // Bit manipulation instruction set 2
+	HasCX16             bool // Compare and exchange 16 Bytes
+	HasERMS             bool // Enhanced REP for MOVSB and STOSB
+	HasFMA              bool // Fused-multiply-add instructions
+	HasOSXSAVE          bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
+	HasPCLMULQDQ        bool // PCLMULQDQ instruction - most often used for AES-GCM
+	HasPOPCNT           bool // Hamming weight instruction POPCNT.
+	HasRDRAND           bool // RDRAND instruction (on-chip random number generator)
+	HasRDSEED           bool // RDSEED instruction (on-chip random number generator)
+	HasSSE2             bool // Streaming SIMD extension 2 (always available on amd64)
+	HasSSE3             bool // Streaming SIMD extension 3
+	HasSSSE3            bool // Supplemental streaming SIMD extension 3
+	HasSSE41            bool // Streaming SIMD extension 4 and 4.1
+	HasSSE42            bool // Streaming SIMD extension 4 and 4.2
+	_                   CacheLinePad
+}
+
+// ARM64 contains the supported CPU features of the
+// current ARMv8(aarch64) platform. If the current platform
+// is not arm64 then all feature flags are false.
+var ARM64 struct {
+	_           CacheLinePad
+	HasFP       bool // Floating-point instruction set (always available)
+	HasASIMD    bool // Advanced SIMD (always available)
+	HasEVTSTRM  bool // Event stream support
+	HasAES      bool // AES hardware implementation
+	HasPMULL    bool // Polynomial multiplication instruction set
+	HasSHA1     bool // SHA1 hardware implementation
+	HasSHA2     bool // SHA2 hardware implementation
+	HasCRC32    bool // CRC32 hardware implementation
+	HasATOMICS  bool // Atomic memory operation instruction set
+	HasFPHP     bool // Half precision floating-point instruction set
+	HasASIMDHP  bool // Advanced SIMD half precision instruction set
+	HasCPUID    bool // CPUID identification scheme registers
+	HasASIMDRDM bool // Rounding double multiply add/subtract instruction set
+	HasJSCVT    bool // Javascript conversion from floating-point to integer
+	HasFCMA     bool // Floating-point multiplication and addition of complex numbers
+	HasLRCPC    bool // Release Consistent processor consistent support
+	HasDCPOP    bool // Persistent memory support
+	HasSHA3     bool // SHA3 hardware implementation
+	HasSM3      bool // SM3 hardware implementation
+	HasSM4      bool // SM4 hardware implementation
+	HasASIMDDP  bool // Advanced SIMD double precision instruction set
+	HasSHA512   bool // SHA512 hardware implementation
+	HasSVE      bool // Scalable Vector Extensions
+	HasSVE2     bool // Scalable Vector Extensions 2
+	HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
+	HasDIT      bool // Data Independent Timing support
+	HasI8MM     bool // Advanced SIMD Int8 matrix multiplication instructions
+	_           CacheLinePad
+}
+
+// ARM contains the supported CPU features of the current ARM (32-bit) platform.
+// All feature flags are false if:
+//  1. the current platform is not arm, or
+//  2. the current operating system is not Linux.
+var ARM struct {
+	_           CacheLinePad
+	HasSWP      bool // SWP instruction support
+	HasHALF     bool // Half-word load and store support
+	HasTHUMB    bool // ARM Thumb instruction set
+	Has26BIT    bool // Address space limited to 26-bits
+	HasFASTMUL  bool // 32-bit operand, 64-bit result multiplication support
+	HasFPA      bool // Floating point arithmetic support
+	HasVFP      bool // Vector floating point support
+	HasEDSP     bool // DSP Extensions support
+	HasJAVA     bool // Java instruction set
+	HasIWMMXT   bool // Intel Wireless MMX technology support
+	HasCRUNCH   bool // MaverickCrunch context switching and handling
+	HasTHUMBEE  bool // Thumb EE instruction set
+	HasNEON     bool // NEON instruction set
+	HasVFPv3    bool // Vector floating point version 3 support
+	HasVFPv3D16 bool // Vector floating point version 3 D8-D15
+	HasTLS      bool // Thread local storage support
+	HasVFPv4    bool // Vector floating point version 4 support
+	HasIDIVA    bool // Integer divide instruction support in ARM mode
+	HasIDIVT    bool // Integer divide instruction support in Thumb mode
+	HasVFPD32   bool // Vector floating point version 3 D15-D31
+	HasLPAE     bool // Large Physical Address Extensions
+	HasEVTSTRM  bool // Event stream support
+	HasAES      bool // AES hardware implementation
+	HasPMULL    bool // Polynomial multiplication instruction set
+	HasSHA1     bool // SHA1 hardware implementation
+	HasSHA2     bool // SHA2 hardware implementation
+	HasCRC32    bool // CRC32 hardware implementation
+	_           CacheLinePad
+}
+
+// MIPS64X contains the supported CPU features of the current mips64/mips64le
+// platforms. If the current platform is not mips64/mips64le or the current
+// operating system is not Linux then all feature flags are false.
+var MIPS64X struct {
+	_      CacheLinePad
+	HasMSA bool // MIPS SIMD architecture
+	_      CacheLinePad
+}
+
+// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
+// If the current platform is not ppc64/ppc64le then all feature flags are false.
+//
+// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00,
+// since there are no optional categories. There are some exceptions that also
+// require kernel support to work (DARN, SCV), so there are feature bits for
+// those as well. The struct is padded to avoid false sharing.
+var PPC64 struct {
+	_        CacheLinePad
+	HasDARN  bool // Hardware random number generator (requires kernel enablement)
+	HasSCV   bool // Syscall vectored (requires kernel enablement)
+	IsPOWER8 bool // ISA v2.07 (POWER8)
+	IsPOWER9 bool // ISA v3.00 (POWER9), implies IsPOWER8
+	_        CacheLinePad
+}
+
+// S390X contains the supported CPU features of the current IBM Z
+// (s390x) platform. If the current platform is not IBM Z then all
+// feature flags are false.
+//
+// S390X is padded to avoid false sharing. Further HasVX is only set
+// if the OS supports vector registers in addition to the STFLE
+// feature bit being set.
+var S390X struct {
+	_         CacheLinePad
+	HasZARCH  bool // z/Architecture mode is active [mandatory]
+	HasSTFLE  bool // store facility list extended
+	HasLDISP  bool // long (20-bit) displacements
+	HasEIMM   bool // 32-bit immediates
+	HasDFP    bool // decimal floating point
+	HasETF3EH bool // ETF-3 enhanced
+	HasMSA    bool // message security assist (CPACF)
+	HasAES    bool // KM-AES{128,192,256} functions
+	HasAESCBC bool // KMC-AES{128,192,256} functions
+	HasAESCTR bool // KMCTR-AES{128,192,256} functions
+	HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
+	HasGHASH  bool // KIMD-GHASH function
+	HasSHA1   bool // K{I,L}MD-SHA-1 functions
+	HasSHA256 bool // K{I,L}MD-SHA-256 functions
+	HasSHA512 bool // K{I,L}MD-SHA-512 functions
+	HasSHA3   bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
+	HasVX     bool // vector facility
+	HasVXE    bool // vector-enhancements facility 1
+	_         CacheLinePad
+}
+
+// RISCV64 contains the supported CPU features and performance characteristics for riscv64
+// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate
+// the presence of RISC-V extensions.
+//
+// It is safe to assume that all the RV64G extensions are supported and so they are omitted from
+// this structure. As riscv64 Go programs require at least RV64G, the code that populates
+// this structure cannot run successfully if some of the RV64G extensions are missing.
+// The struct is padded to avoid false sharing.
+var RISCV64 struct {
+	_                 CacheLinePad
+	HasFastMisaligned bool // Fast misaligned accesses
+	HasC              bool // Compressed instruction-set extension
+	HasV              bool // Vector extension compatible with RVV 1.0
+	HasZba            bool // Address generation instructions extension
+	HasZbb            bool // Basic bit-manipulation extension
+	HasZbs            bool // Single-bit instructions extension
+	_                 CacheLinePad
+}
+
+func init() {
+	archInit()
+	initOptions()
+	processOptions()
+}
+
+// options contains the cpu debug options that can be used in GODEBUG.
+// Options are arch dependent and are added by the arch specific initOptions functions.
+// Features that are mandatory for the specific GOARCH should have the Required field set
+// (e.g. SSE2 on amd64).
+var options []option
+
+// Option names should be lower case. e.g. avx instead of AVX.
+type option struct {
+	Name      string
+	Feature   *bool
+	Specified bool // whether feature value was specified in GODEBUG
+	Enable    bool // whether feature should be enabled
+	Required  bool // whether feature is mandatory and can not be disabled
+}
+
+func processOptions() {
+	env := os.Getenv("GODEBUG")
+field:
+	for env != "" {
+		field := ""
+		i := strings.IndexByte(env, ',')
+		if i < 0 {
+			field, env = env, ""
+		} else {
+			field, env = env[:i], env[i+1:]
+		}
+		if len(field) < 4 || field[:4] != "cpu." {
+			continue
+		}
+		i = strings.IndexByte(field, '=')
+		if i < 0 {
+			print("GODEBUG sys/cpu: no value specified for \"", field, "\"\n")
+			continue
+		}
+		key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
+
+		var enable bool
+		switch value {
+		case "on":
+			enable = true
+		case "off":
+			enable = false
+		default:
+			print("GODEBUG sys/cpu: value \"", value, "\" not supported for cpu option \"", key, "\"\n")
+			continue field
+		}
+
+		if key == "all" {
+			for i := range options {
+				options[i].Specified = true
+				options[i].Enable = enable || options[i].Required
+			}
+			continue field
+		}
+
+		for i := range options {
+			if options[i].Name == key {
+				options[i].Specified = true
+				options[i].Enable = enable
+				continue field
+			}
+		}
+
+		print("GODEBUG sys/cpu: unknown cpu feature \"", key, "\"\n")
+	}
+
+	for _, o := range options {
+		if !o.Specified {
+			continue
+		}
+
+		if o.Enable && !*o.Feature {
+			print("GODEBUG sys/cpu: can not enable \"", o.Name, "\", missing CPU support\n")
+			continue
+		}
+
+		if !o.Enable && o.Required {
+			print("GODEBUG sys/cpu: can not disable \"", o.Name, "\", required CPU feature\n")
+			continue
+		}
+
+		*o.Feature = o.Enable
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_aix.go b/vendor/golang.org/x/sys/cpu/cpu_aix.go
new file mode 100644
index 0000000..9bf0c32
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_aix.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix
+
+package cpu
+
+const (
+	// getsystemcfg constants
+	_SC_IMPL     = 2
+	_IMPL_POWER8 = 0x10000
+	_IMPL_POWER9 = 0x20000
+)
+
+func archInit() {
+	impl := getsystemcfg(_SC_IMPL)
+	if impl&_IMPL_POWER8 != 0 {
+		PPC64.IsPOWER8 = true
+	}
+	if impl&_IMPL_POWER9 != 0 {
+		PPC64.IsPOWER8 = true
+		PPC64.IsPOWER9 = true
+	}
+
+	Initialized = true
+}
+
+func getsystemcfg(label int) (n uint64) {
+	r0, _ := callgetsystemcfg(label)
+	n = uint64(r0)
+	return
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm.go b/vendor/golang.org/x/sys/cpu/cpu_arm.go
new file mode 100644
index 0000000..301b752
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm.go
@@ -0,0 +1,73 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const cacheLineSize = 32
+
+// HWCAP/HWCAP2 bits.
+// These are specific to Linux.
+const (
+	hwcap_SWP       = 1 << 0
+	hwcap_HALF      = 1 << 1
+	hwcap_THUMB     = 1 << 2
+	hwcap_26BIT     = 1 << 3
+	hwcap_FAST_MULT = 1 << 4
+	hwcap_FPA       = 1 << 5
+	hwcap_VFP       = 1 << 6
+	hwcap_EDSP      = 1 << 7
+	hwcap_JAVA      = 1 << 8
+	hwcap_IWMMXT    = 1 << 9
+	hwcap_CRUNCH    = 1 << 10
+	hwcap_THUMBEE   = 1 << 11
+	hwcap_NEON      = 1 << 12
+	hwcap_VFPv3     = 1 << 13
+	hwcap_VFPv3D16  = 1 << 14
+	hwcap_TLS       = 1 << 15
+	hwcap_VFPv4     = 1 << 16
+	hwcap_IDIVA     = 1 << 17
+	hwcap_IDIVT     = 1 << 18
+	hwcap_VFPD32    = 1 << 19
+	hwcap_LPAE      = 1 << 20
+	hwcap_EVTSTRM   = 1 << 21
+
+	hwcap2_AES   = 1 << 0
+	hwcap2_PMULL = 1 << 1
+	hwcap2_SHA1  = 1 << 2
+	hwcap2_SHA2  = 1 << 3
+	hwcap2_CRC32 = 1 << 4
+)
+
+func initOptions() {
+	options = []option{
+		{Name: "pmull", Feature: &ARM.HasPMULL},
+		{Name: "sha1", Feature: &ARM.HasSHA1},
+		{Name: "sha2", Feature: &ARM.HasSHA2},
+		{Name: "swp", Feature: &ARM.HasSWP},
+		{Name: "thumb", Feature: &ARM.HasTHUMB},
+		{Name: "thumbee", Feature: &ARM.HasTHUMBEE},
+		{Name: "tls", Feature: &ARM.HasTLS},
+		{Name: "vfp", Feature: &ARM.HasVFP},
+		{Name: "vfpd32", Feature: &ARM.HasVFPD32},
+		{Name: "vfpv3", Feature: &ARM.HasVFPv3},
+		{Name: "vfpv3d16", Feature: &ARM.HasVFPv3D16},
+		{Name: "vfpv4", Feature: &ARM.HasVFPv4},
+		{Name: "half", Feature: &ARM.HasHALF},
+		{Name: "26bit", Feature: &ARM.Has26BIT},
+		{Name: "fastmul", Feature: &ARM.HasFASTMUL},
+		{Name: "fpa", Feature: &ARM.HasFPA},
+		{Name: "edsp", Feature: &ARM.HasEDSP},
+		{Name: "java", Feature: &ARM.HasJAVA},
+		{Name: "iwmmxt", Feature: &ARM.HasIWMMXT},
+		{Name: "crunch", Feature: &ARM.HasCRUNCH},
+		{Name: "neon", Feature: &ARM.HasNEON},
+		{Name: "idivt", Feature: &ARM.HasIDIVT},
+		{Name: "idiva", Feature: &ARM.HasIDIVA},
+		{Name: "lpae", Feature: &ARM.HasLPAE},
+		{Name: "evtstrm", Feature: &ARM.HasEVTSTRM},
+		{Name: "aes", Feature: &ARM.HasAES},
+		{Name: "crc32", Feature: &ARM.HasCRC32},
+	}
+
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
new file mode 100644
index 0000000..af2aa99
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
@@ -0,0 +1,194 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import "runtime"
+
+// cacheLineSize is used to prevent false sharing of cache lines.
+// We choose 128 because Apple Silicon, a.k.a. M1, has 128-byte cache line size.
+// It doesn't cost much and is much more future-proof.
+const cacheLineSize = 128
+
+func initOptions() {
+	options = []option{
+		{Name: "fp", Feature: &ARM64.HasFP},
+		{Name: "asimd", Feature: &ARM64.HasASIMD},
+		{Name: "evstrm", Feature: &ARM64.HasEVTSTRM},
+		{Name: "aes", Feature: &ARM64.HasAES},
+		{Name: "fphp", Feature: &ARM64.HasFPHP},
+		{Name: "jscvt", Feature: &ARM64.HasJSCVT},
+		{Name: "lrcpc", Feature: &ARM64.HasLRCPC},
+		{Name: "pmull", Feature: &ARM64.HasPMULL},
+		{Name: "sha1", Feature: &ARM64.HasSHA1},
+		{Name: "sha2", Feature: &ARM64.HasSHA2},
+		{Name: "sha3", Feature: &ARM64.HasSHA3},
+		{Name: "sha512", Feature: &ARM64.HasSHA512},
+		{Name: "sm3", Feature: &ARM64.HasSM3},
+		{Name: "sm4", Feature: &ARM64.HasSM4},
+		{Name: "sve", Feature: &ARM64.HasSVE},
+		{Name: "sve2", Feature: &ARM64.HasSVE2},
+		{Name: "crc32", Feature: &ARM64.HasCRC32},
+		{Name: "atomics", Feature: &ARM64.HasATOMICS},
+		{Name: "asimdhp", Feature: &ARM64.HasASIMDHP},
+		{Name: "cpuid", Feature: &ARM64.HasCPUID},
+		{Name: "asimrdm", Feature: &ARM64.HasASIMDRDM},
+		{Name: "fcma", Feature: &ARM64.HasFCMA},
+		{Name: "dcpop", Feature: &ARM64.HasDCPOP},
+		{Name: "asimddp", Feature: &ARM64.HasASIMDDP},
+		{Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM},
+		{Name: "dit", Feature: &ARM64.HasDIT},
+		{Name: "i8mm", Feature: &ARM64.HasI8MM},
+	}
+}
+
+func archInit() {
+	switch runtime.GOOS {
+	case "freebsd":
+		readARM64Registers()
+	case "linux", "netbsd", "openbsd":
+		doinit()
+	default:
+		// Many platforms don't seem to allow reading these registers.
+		setMinimalFeatures()
+	}
+}
+
+// setMinimalFeatures fakes the minimal ARM64 features expected by
+// TestARM64minimalFeatures.
+func setMinimalFeatures() {
+	ARM64.HasASIMD = true
+	ARM64.HasFP = true
+}
+
+func readARM64Registers() {
+	Initialized = true
+
+	parseARM64SystemRegisters(getisar0(), getisar1(), getpfr0())
+}
+
+func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
+	// ID_AA64ISAR0_EL1
+	switch extractBits(isar0, 4, 7) {
+	case 1:
+		ARM64.HasAES = true
+	case 2:
+		ARM64.HasAES = true
+		ARM64.HasPMULL = true
+	}
+
+	switch extractBits(isar0, 8, 11) {
+	case 1:
+		ARM64.HasSHA1 = true
+	}
+
+	switch extractBits(isar0, 12, 15) {
+	case 1:
+		ARM64.HasSHA2 = true
+	case 2:
+		ARM64.HasSHA2 = true
+		ARM64.HasSHA512 = true
+	}
+
+	switch extractBits(isar0, 16, 19) {
+	case 1:
+		ARM64.HasCRC32 = true
+	}
+
+	switch extractBits(isar0, 20, 23) {
+	case 2:
+		ARM64.HasATOMICS = true
+	}
+
+	switch extractBits(isar0, 28, 31) {
+	case 1:
+		ARM64.HasASIMDRDM = true
+	}
+
+	switch extractBits(isar0, 32, 35) {
+	case 1:
+		ARM64.HasSHA3 = true
+	}
+
+	switch extractBits(isar0, 36, 39) {
+	case 1:
+		ARM64.HasSM3 = true
+	}
+
+	switch extractBits(isar0, 40, 43) {
+	case 1:
+		ARM64.HasSM4 = true
+	}
+
+	switch extractBits(isar0, 44, 47) {
+	case 1:
+		ARM64.HasASIMDDP = true
+	}
+
+	// ID_AA64ISAR1_EL1
+	switch extractBits(isar1, 0, 3) {
+	case 1:
+		ARM64.HasDCPOP = true
+	}
+
+	switch extractBits(isar1, 12, 15) {
+	case 1:
+		ARM64.HasJSCVT = true
+	}
+
+	switch extractBits(isar1, 16, 19) {
+	case 1:
+		ARM64.HasFCMA = true
+	}
+
+	switch extractBits(isar1, 20, 23) {
+	case 1:
+		ARM64.HasLRCPC = true
+	}
+
+	switch extractBits(isar1, 52, 55) {
+	case 1:
+		ARM64.HasI8MM = true
+	}
+
+	// ID_AA64PFR0_EL1
+	switch extractBits(pfr0, 16, 19) {
+	case 0:
+		ARM64.HasFP = true
+	case 1:
+		ARM64.HasFP = true
+		ARM64.HasFPHP = true
+	}
+
+	switch extractBits(pfr0, 20, 23) {
+	case 0:
+		ARM64.HasASIMD = true
+	case 1:
+		ARM64.HasASIMD = true
+		ARM64.HasASIMDHP = true
+	}
+
+	switch extractBits(pfr0, 32, 35) {
+	case 1:
+		ARM64.HasSVE = true
+
+		parseARM64SVERegister(getzfr0())
+	}
+
+	switch extractBits(pfr0, 48, 51) {
+	case 1:
+		ARM64.HasDIT = true
+	}
+}
+
+func parseARM64SVERegister(zfr0 uint64) {
+	switch extractBits(zfr0, 0, 3) {
+	case 1:
+		ARM64.HasSVE2 = true
+	}
+}
+
+func extractBits(data uint64, start, end uint) uint {
+	return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.s b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
new file mode 100644
index 0000000..22cc998
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
@@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+
+#include "textflag.h"
+
+// func getisar0() uint64
+TEXT ·getisar0(SB),NOSPLIT,$0-8
+	// get Instruction Set Attributes 0 into x0
+	// mrs x0, ID_AA64ISAR0_EL1 = d5380600
+	WORD	$0xd5380600
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getisar1() uint64
+TEXT ·getisar1(SB),NOSPLIT,$0-8
+	// get Instruction Set Attributes 1 into x0
+	// mrs x0, ID_AA64ISAR1_EL1 = d5380620
+	WORD	$0xd5380620
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getpfr0() uint64
+TEXT ·getpfr0(SB),NOSPLIT,$0-8
+	// get Processor Feature Register 0 into x0
+	// mrs x0, ID_AA64PFR0_EL1 = d5380400
+	WORD	$0xd5380400
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getzfr0() uint64
+TEXT ·getzfr0(SB),NOSPLIT,$0-8
+	// get SVE Feature Register 0 into x0
+	// mrs	x0, ID_AA64ZFR0_EL1 = d5380480
+	WORD $0xd5380480
+	MOVD	R0, ret+0(FP)
+	RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
new file mode 100644
index 0000000..6ac6e1e
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
@@ -0,0 +1,12 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+
+package cpu
+
+func getisar0() uint64
+func getisar1() uint64
+func getpfr0() uint64
+func getzfr0() uint64
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
new file mode 100644
index 0000000..c8ae6dd
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
@@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+
+package cpu
+
+// haveAsmFunctions reports whether the other functions in this file can
+// be safely called.
+func haveAsmFunctions() bool { return true }
+
+// The following feature detection functions are defined in cpu_s390x.s.
+// They are likely to be expensive to call so the results should be cached.
+func stfle() facilityList
+func kmQuery() queryResult
+func kmcQuery() queryResult
+func kmctrQuery() queryResult
+func kmaQuery() queryResult
+func kimdQuery() queryResult
+func klmdQuery() queryResult
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
new file mode 100644
index 0000000..910728f
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
@@ -0,0 +1,15 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (386 || amd64 || amd64p32) && gc
+
+package cpu
+
+// cpuid is implemented in cpu_x86.s for gc compiler
+// and in cpu_gccgo.c for gccgo.
+func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
+
+// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
+// and in cpu_gccgo.c for gccgo.
+func xgetbv() (eax, edx uint32)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
new file mode 100644
index 0000000..7f19467
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
@@ -0,0 +1,11 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gccgo
+
+package cpu
+
+func getisar0() uint64 { return 0 }
+func getisar1() uint64 { return 0 }
+func getpfr0() uint64  { return 0 }
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
new file mode 100644
index 0000000..9526d2c
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gccgo
+
+package cpu
+
+// haveAsmFunctions reports whether the other functions in this file can
+// be safely called.
+func haveAsmFunctions() bool { return false }
+
+// TODO(mundaym): the following feature detection functions are currently
+// stubs. See https://golang.org/cl/162887 for how to fix this.
+// They are likely to be expensive to call so the results should be cached.
+func stfle() facilityList     { panic("not implemented for gccgo") }
+func kmQuery() queryResult    { panic("not implemented for gccgo") }
+func kmcQuery() queryResult   { panic("not implemented for gccgo") }
+func kmctrQuery() queryResult { panic("not implemented for gccgo") }
+func kmaQuery() queryResult   { panic("not implemented for gccgo") }
+func kimdQuery() queryResult  { panic("not implemented for gccgo") }
+func klmdQuery() queryResult  { panic("not implemented for gccgo") }
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
new file mode 100644
index 0000000..3f73a05
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
@@ -0,0 +1,37 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (386 || amd64 || amd64p32) && gccgo
+
+#include <cpuid.h>
+#include <stdint.h>
+#include <x86intrin.h>
+
+// Need to wrap __get_cpuid_count because it's declared as static.
+int
+gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf,
+                   uint32_t *eax, uint32_t *ebx,
+                   uint32_t *ecx, uint32_t *edx)
+{
+	return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx);
+}
+
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
+#pragma GCC push_options
+#pragma GCC target("xsave")
+#pragma clang attribute push (__attribute__((target("xsave"))), apply_to=function)
+
+// xgetbv reads the contents of an XCR (Extended Control Register)
+// specified in the ECX register into registers EDX:EAX.
+// Currently, the only supported value for XCR is 0.
+void
+gccgoXgetbv(uint32_t *eax, uint32_t *edx)
+{
+	uint64_t v = _xgetbv(0);
+	*eax = v & 0xffffffff;
+	*edx = v >> 32;
+}
+
+#pragma clang attribute pop
+#pragma GCC pop_options
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
new file mode 100644
index 0000000..99c60fe
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
@@ -0,0 +1,31 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (386 || amd64 || amd64p32) && gccgo
+
+package cpu
+
+//extern gccgoGetCpuidCount
+func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32)
+
+func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) {
+	var a, b, c, d uint32
+	gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d)
+	return a, b, c, d
+}
+
+//extern gccgoXgetbv
+func gccgoXgetbv(eax, edx *uint32)
+
+func xgetbv() (eax, edx uint32) {
+	var a, d uint32
+	gccgoXgetbv(&a, &d)
+	return a, d
+}
+
+// gccgo doesn't build on Darwin, per:
+// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
+func darwinSupportsAVX512() bool {
+	return false
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux.go b/vendor/golang.org/x/sys/cpu/cpu_linux.go
new file mode 100644
index 0000000..743eb54
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux.go
@@ -0,0 +1,15 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386 && !amd64 && !amd64p32 && !arm64
+
+package cpu
+
+func archInit() {
+	if err := readHWCAP(); err != nil {
+		return
+	}
+	doinit()
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go
new file mode 100644
index 0000000..2057006
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go
@@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+func doinit() {
+	ARM.HasSWP = isSet(hwCap, hwcap_SWP)
+	ARM.HasHALF = isSet(hwCap, hwcap_HALF)
+	ARM.HasTHUMB = isSet(hwCap, hwcap_THUMB)
+	ARM.Has26BIT = isSet(hwCap, hwcap_26BIT)
+	ARM.HasFASTMUL = isSet(hwCap, hwcap_FAST_MULT)
+	ARM.HasFPA = isSet(hwCap, hwcap_FPA)
+	ARM.HasVFP = isSet(hwCap, hwcap_VFP)
+	ARM.HasEDSP = isSet(hwCap, hwcap_EDSP)
+	ARM.HasJAVA = isSet(hwCap, hwcap_JAVA)
+	ARM.HasIWMMXT = isSet(hwCap, hwcap_IWMMXT)
+	ARM.HasCRUNCH = isSet(hwCap, hwcap_CRUNCH)
+	ARM.HasTHUMBEE = isSet(hwCap, hwcap_THUMBEE)
+	ARM.HasNEON = isSet(hwCap, hwcap_NEON)
+	ARM.HasVFPv3 = isSet(hwCap, hwcap_VFPv3)
+	ARM.HasVFPv3D16 = isSet(hwCap, hwcap_VFPv3D16)
+	ARM.HasTLS = isSet(hwCap, hwcap_TLS)
+	ARM.HasVFPv4 = isSet(hwCap, hwcap_VFPv4)
+	ARM.HasIDIVA = isSet(hwCap, hwcap_IDIVA)
+	ARM.HasIDIVT = isSet(hwCap, hwcap_IDIVT)
+	ARM.HasVFPD32 = isSet(hwCap, hwcap_VFPD32)
+	ARM.HasLPAE = isSet(hwCap, hwcap_LPAE)
+	ARM.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM)
+	ARM.HasAES = isSet(hwCap2, hwcap2_AES)
+	ARM.HasPMULL = isSet(hwCap2, hwcap2_PMULL)
+	ARM.HasSHA1 = isSet(hwCap2, hwcap2_SHA1)
+	ARM.HasSHA2 = isSet(hwCap2, hwcap2_SHA2)
+	ARM.HasCRC32 = isSet(hwCap2, hwcap2_CRC32)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
new file mode 100644
index 0000000..08f35ea
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
@@ -0,0 +1,121 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"strings"
+	"syscall"
+)
+
+// HWCAP/HWCAP2 bits. These are exposed by Linux.
+const (
+	hwcap_FP       = 1 << 0
+	hwcap_ASIMD    = 1 << 1
+	hwcap_EVTSTRM  = 1 << 2
+	hwcap_AES      = 1 << 3
+	hwcap_PMULL    = 1 << 4
+	hwcap_SHA1     = 1 << 5
+	hwcap_SHA2     = 1 << 6
+	hwcap_CRC32    = 1 << 7
+	hwcap_ATOMICS  = 1 << 8
+	hwcap_FPHP     = 1 << 9
+	hwcap_ASIMDHP  = 1 << 10
+	hwcap_CPUID    = 1 << 11
+	hwcap_ASIMDRDM = 1 << 12
+	hwcap_JSCVT    = 1 << 13
+	hwcap_FCMA     = 1 << 14
+	hwcap_LRCPC    = 1 << 15
+	hwcap_DCPOP    = 1 << 16
+	hwcap_SHA3     = 1 << 17
+	hwcap_SM3      = 1 << 18
+	hwcap_SM4      = 1 << 19
+	hwcap_ASIMDDP  = 1 << 20
+	hwcap_SHA512   = 1 << 21
+	hwcap_SVE      = 1 << 22
+	hwcap_ASIMDFHM = 1 << 23
+	hwcap_DIT      = 1 << 24
+
+	hwcap2_SVE2 = 1 << 1
+	hwcap2_I8MM = 1 << 13
+)
+
+// linuxKernelCanEmulateCPUID reports whether we're running
+// on Linux 4.11+. Ideally we'd like to ask the question about
+// whether the current kernel contains
+// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=77c97b4ee21290f5f083173d957843b615abbff2
+// but the version number will have to do.
+func linuxKernelCanEmulateCPUID() bool {
+	var un syscall.Utsname
+	syscall.Uname(&un)
+	var sb strings.Builder
+	for _, b := range un.Release[:] {
+		if b == 0 {
+			break
+		}
+		sb.WriteByte(byte(b))
+	}
+	major, minor, _, ok := parseRelease(sb.String())
+	return ok && (major > 4 || major == 4 && minor >= 11)
+}
+
+func doinit() {
+	if err := readHWCAP(); err != nil {
+		// We failed to read /proc/self/auxv. This can happen if the binary has
+		// been given extra capabilities(7) with /bin/setcap.
+		//
+		// When this happens, we have two options. If the Linux kernel is new
+		// enough (4.11+), we can read the arm64 registers directly which'll
+		// trap into the kernel and then return back to userspace.
+		//
+		// But on older kernels, such as Linux 4.4.180 as used on many Synology
+		// devices, calling readARM64Registers (specifically getisar0) will
+		// cause a SIGILL and we'll die. So for older kernels, parse /proc/cpuinfo
+		// instead.
+		//
+		// See golang/go#57336.
+		if linuxKernelCanEmulateCPUID() {
+			readARM64Registers()
+		} else {
+			readLinuxProcCPUInfo()
+		}
+		return
+	}
+
+	// HWCAP feature bits
+	ARM64.HasFP = isSet(hwCap, hwcap_FP)
+	ARM64.HasASIMD = isSet(hwCap, hwcap_ASIMD)
+	ARM64.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM)
+	ARM64.HasAES = isSet(hwCap, hwcap_AES)
+	ARM64.HasPMULL = isSet(hwCap, hwcap_PMULL)
+	ARM64.HasSHA1 = isSet(hwCap, hwcap_SHA1)
+	ARM64.HasSHA2 = isSet(hwCap, hwcap_SHA2)
+	ARM64.HasCRC32 = isSet(hwCap, hwcap_CRC32)
+	ARM64.HasATOMICS = isSet(hwCap, hwcap_ATOMICS)
+	ARM64.HasFPHP = isSet(hwCap, hwcap_FPHP)
+	ARM64.HasASIMDHP = isSet(hwCap, hwcap_ASIMDHP)
+	ARM64.HasCPUID = isSet(hwCap, hwcap_CPUID)
+	ARM64.HasASIMDRDM = isSet(hwCap, hwcap_ASIMDRDM)
+	ARM64.HasJSCVT = isSet(hwCap, hwcap_JSCVT)
+	ARM64.HasFCMA = isSet(hwCap, hwcap_FCMA)
+	ARM64.HasLRCPC = isSet(hwCap, hwcap_LRCPC)
+	ARM64.HasDCPOP = isSet(hwCap, hwcap_DCPOP)
+	ARM64.HasSHA3 = isSet(hwCap, hwcap_SHA3)
+	ARM64.HasSM3 = isSet(hwCap, hwcap_SM3)
+	ARM64.HasSM4 = isSet(hwCap, hwcap_SM4)
+	ARM64.HasASIMDDP = isSet(hwCap, hwcap_ASIMDDP)
+	ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512)
+	ARM64.HasSVE = isSet(hwCap, hwcap_SVE)
+	ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM)
+	ARM64.HasDIT = isSet(hwCap, hwcap_DIT)
+
+
+	// HWCAP2 feature bits
+	ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2)
+	ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
new file mode 100644
index 0000000..4686c1d
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
@@ -0,0 +1,22 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (mips64 || mips64le)
+
+package cpu
+
+// HWCAP bits. These are exposed by the Linux kernel 5.4.
+const (
+	// CPU features
+	hwcap_MIPS_MSA = 1 << 1
+)
+
+func doinit() {
+	// HWCAP feature bits
+	MIPS64X.HasMSA = isSet(hwCap, hwcap_MIPS_MSA)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
new file mode 100644
index 0000000..7d902b6
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@@ -0,0 +1,9 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64
+
+package cpu
+
+func doinit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
new file mode 100644
index 0000000..197188e
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
@@ -0,0 +1,30 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (ppc64 || ppc64le)
+
+package cpu
+
+// HWCAP/HWCAP2 bits. These are exposed by the kernel.
+const (
+	// ISA Level
+	_PPC_FEATURE2_ARCH_2_07 = 0x80000000
+	_PPC_FEATURE2_ARCH_3_00 = 0x00800000
+
+	// CPU features
+	_PPC_FEATURE2_DARN = 0x00200000
+	_PPC_FEATURE2_SCV  = 0x00100000
+)
+
+func doinit() {
+	// HWCAP2 feature bits
+	PPC64.IsPOWER8 = isSet(hwCap2, _PPC_FEATURE2_ARCH_2_07)
+	PPC64.IsPOWER9 = isSet(hwCap2, _PPC_FEATURE2_ARCH_3_00)
+	PPC64.HasDARN = isSet(hwCap2, _PPC_FEATURE2_DARN)
+	PPC64.HasSCV = isSet(hwCap2, _PPC_FEATURE2_SCV)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
new file mode 100644
index 0000000..cb4a0c5
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
@@ -0,0 +1,137 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe
+// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available.
+//
+// A note on detection of the Vector extension using HWCAP.
+//
+// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5.
+// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe
+// syscall is not available then neither is the Vector extension (which needs kernel support).
+// The riscv_hwprobe syscall should then be all we need to detect the Vector extension.
+// However, some RISC-V board manufacturers ship boards with an older kernel on top of which
+// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe
+// patches. These kernels advertise support for the Vector extension using HWCAP. Falling
+// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not
+// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option.
+//
+// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by
+// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board
+// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified
+// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use
+// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector
+// extension are binary incompatible. HWCAP can then not be used in isolation to populate the
+// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0.
+//
+// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector
+// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype
+// register. This check would allow us to safely detect version 1.0 of the Vector extension
+// with HWCAP, if riscv_hwprobe were not available. However, the check cannot
+// be added until the assembler supports the Vector instructions.
+//
+// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the
+// extensions it advertises support for are explicitly versioned. It's also worth noting that
+// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba.
+// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority
+// of RISC-V extensions.
+//
+// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information.
+
+// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must
+// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall
+// here.
+
+const (
+	// Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+	riscv_HWPROBE_KEY_IMA_EXT_0   = 0x4
+	riscv_HWPROBE_IMA_C           = 0x2
+	riscv_HWPROBE_IMA_V           = 0x4
+	riscv_HWPROBE_EXT_ZBA         = 0x8
+	riscv_HWPROBE_EXT_ZBB         = 0x10
+	riscv_HWPROBE_EXT_ZBS         = 0x20
+	riscv_HWPROBE_KEY_CPUPERF_0   = 0x5
+	riscv_HWPROBE_MISALIGNED_FAST = 0x3
+	riscv_HWPROBE_MISALIGNED_MASK = 0x7
+)
+
+const (
+	// sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go.
+	sys_RISCV_HWPROBE = 258
+)
+
+// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+type riscvHWProbePairs struct {
+	key   int64
+	value uint64
+}
+
+const (
+	// CPU features
+	hwcap_RISCV_ISA_C = 1 << ('C' - 'A')
+)
+
+func doinit() {
+	// A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key
+	// field should be initialised with one of the key constants defined above, e.g.,
+	// RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value.
+	// If the kernel does not recognise a key it will set the key field to -1 and the value field to 0.
+
+	pairs := []riscvHWProbePairs{
+		{riscv_HWPROBE_KEY_IMA_EXT_0, 0},
+		{riscv_HWPROBE_KEY_CPUPERF_0, 0},
+	}
+
+	// This call only indicates that extensions are supported if they are implemented on all cores.
+	if riscvHWProbe(pairs, 0) {
+		if pairs[0].key != -1 {
+			v := uint(pairs[0].value)
+			RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C)
+			RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V)
+			RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA)
+			RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB)
+			RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS)
+		}
+		if pairs[1].key != -1 {
+			v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK
+			RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST
+		}
+	}
+
+	// Let's double check with HWCAP if the C extension does not appear to be supported.
+	// This may happen if we're running on a kernel older than 6.4.
+
+	if !RISCV64.HasC {
+		RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C)
+	}
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
+
+// riscvHWProbe is a simplified version of the generated wrapper function found in
+// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the
+// cpuCount and cpus parameters which we do not need. We always want to pass 0 for
+// these parameters here so the kernel only reports the extensions that are present
+// on all cores.
+func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool {
+	var _zero uintptr
+	var p0 unsafe.Pointer
+	if len(pairs) > 0 {
+		p0 = unsafe.Pointer(&pairs[0])
+	} else {
+		p0 = unsafe.Pointer(&_zero)
+	}
+
+	_, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0)
+	return e1 == 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go
new file mode 100644
index 0000000..1517ac6
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go
@@ -0,0 +1,40 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const (
+	// bit mask values from /usr/include/bits/hwcap.h
+	hwcap_ZARCH  = 2
+	hwcap_STFLE  = 4
+	hwcap_MSA    = 8
+	hwcap_LDISP  = 16
+	hwcap_EIMM   = 32
+	hwcap_DFP    = 64
+	hwcap_ETF3EH = 256
+	hwcap_VX     = 2048
+	hwcap_VXE    = 8192
+)
+
+func initS390Xbase() {
+	// test HWCAP bit vector
+	has := func(featureMask uint) bool {
+		return hwCap&featureMask == featureMask
+	}
+
+	// mandatory
+	S390X.HasZARCH = has(hwcap_ZARCH)
+
+	// optional
+	S390X.HasSTFLE = has(hwcap_STFLE)
+	S390X.HasLDISP = has(hwcap_LDISP)
+	S390X.HasEIMM = has(hwcap_EIMM)
+	S390X.HasETF3EH = has(hwcap_ETF3EH)
+	S390X.HasDFP = has(hwcap_DFP)
+	S390X.HasMSA = has(hwcap_MSA)
+	S390X.HasVX = has(hwcap_VX)
+	if S390X.HasVX {
+		S390X.HasVXE = has(hwcap_VXE)
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_loong64.go b/vendor/golang.org/x/sys/cpu/cpu_loong64.go
new file mode 100644
index 0000000..5586358
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_loong64.go
@@ -0,0 +1,12 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build loong64
+
+package cpu
+
+const cacheLineSize = 64
+
+func initOptions() {
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
new file mode 100644
index 0000000..fedb00c
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
@@ -0,0 +1,15 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+
+package cpu
+
+const cacheLineSize = 32
+
+func initOptions() {
+	options = []option{
+		{Name: "msa", Feature: &MIPS64X.HasMSA},
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
new file mode 100644
index 0000000..ffb4ec7
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
@@ -0,0 +1,11 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+
+package cpu
+
+const cacheLineSize = 32
+
+func initOptions() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go
new file mode 100644
index 0000000..ebfb3fc
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go
@@ -0,0 +1,173 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// Minimal copy of functionality from x/sys/unix so the cpu package can call
+// sysctl without depending on x/sys/unix.
+
+const (
+	_CTL_QUERY = -2
+
+	_SYSCTL_VERS_1 = 0x1000000
+)
+
+var _zero uintptr
+
+func sysctl(mib []int32, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) {
+	var _p0 unsafe.Pointer
+	if len(mib) > 0 {
+		_p0 = unsafe.Pointer(&mib[0])
+	} else {
+		_p0 = unsafe.Pointer(&_zero)
+	}
+	_, _, errno := syscall.Syscall6(
+		syscall.SYS___SYSCTL,
+		uintptr(_p0),
+		uintptr(len(mib)),
+		uintptr(unsafe.Pointer(old)),
+		uintptr(unsafe.Pointer(oldlen)),
+		uintptr(unsafe.Pointer(new)),
+		uintptr(newlen))
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+type sysctlNode struct {
+	Flags          uint32
+	Num            int32
+	Name           [32]int8
+	Ver            uint32
+	__rsvd         uint32
+	Un             [16]byte
+	_sysctl_size   [8]byte
+	_sysctl_func   [8]byte
+	_sysctl_parent [8]byte
+	_sysctl_desc   [8]byte
+}
+
+func sysctlNodes(mib []int32) ([]sysctlNode, error) {
+	var olen uintptr
+
+	// Get a list of all sysctl nodes below the given MIB by performing
+	// a sysctl for the given MIB with CTL_QUERY appended.
+	mib = append(mib, _CTL_QUERY)
+	qnode := sysctlNode{Flags: _SYSCTL_VERS_1}
+	qp := (*byte)(unsafe.Pointer(&qnode))
+	sz := unsafe.Sizeof(qnode)
+	if err := sysctl(mib, nil, &olen, qp, sz); err != nil {
+		return nil, err
+	}
+
+	// Now that we know the size, get the actual nodes.
+	nodes := make([]sysctlNode, olen/sz)
+	np := (*byte)(unsafe.Pointer(&nodes[0]))
+	if err := sysctl(mib, np, &olen, qp, sz); err != nil {
+		return nil, err
+	}
+
+	return nodes, nil
+}
+
+func nametomib(name string) ([]int32, error) {
+	// Split name into components.
+	var parts []string
+	last := 0
+	for i := 0; i < len(name); i++ {
+		if name[i] == '.' {
+			parts = append(parts, name[last:i])
+			last = i + 1
+		}
+	}
+	parts = append(parts, name[last:])
+
+	mib := []int32{}
+	// Discover the nodes and construct the MIB OID.
+	for partno, part := range parts {
+		nodes, err := sysctlNodes(mib)
+		if err != nil {
+			return nil, err
+		}
+		for _, node := range nodes {
+			n := make([]byte, 0)
+			for i := range node.Name {
+				if node.Name[i] != 0 {
+					n = append(n, byte(node.Name[i]))
+				}
+			}
+			if string(n) == part {
+				mib = append(mib, int32(node.Num))
+				break
+			}
+		}
+		if len(mib) != partno+1 {
+			return nil, err
+		}
+	}
+
+	return mib, nil
+}
+
+// aarch64SysctlCPUID is struct aarch64_sysctl_cpu_id from NetBSD's <aarch64/armreg.h>
+type aarch64SysctlCPUID struct {
+	midr      uint64 /* Main ID Register */
+	revidr    uint64 /* Revision ID Register */
+	mpidr     uint64 /* Multiprocessor Affinity Register */
+	aa64dfr0  uint64 /* A64 Debug Feature Register 0 */
+	aa64dfr1  uint64 /* A64 Debug Feature Register 1 */
+	aa64isar0 uint64 /* A64 Instruction Set Attribute Register 0 */
+	aa64isar1 uint64 /* A64 Instruction Set Attribute Register 1 */
+	aa64mmfr0 uint64 /* A64 Memory Model Feature Register 0 */
+	aa64mmfr1 uint64 /* A64 Memory Model Feature Register 1 */
+	aa64mmfr2 uint64 /* A64 Memory Model Feature Register 2 */
+	aa64pfr0  uint64 /* A64 Processor Feature Register 0 */
+	aa64pfr1  uint64 /* A64 Processor Feature Register 1 */
+	aa64zfr0  uint64 /* A64 SVE Feature ID Register 0 */
+	mvfr0     uint32 /* Media and VFP Feature Register 0 */
+	mvfr1     uint32 /* Media and VFP Feature Register 1 */
+	mvfr2     uint32 /* Media and VFP Feature Register 2 */
+	pad       uint32
+	clidr     uint64 /* Cache Level ID Register */
+	ctr       uint64 /* Cache Type Register */
+}
+
+func sysctlCPUID(name string) (*aarch64SysctlCPUID, error) {
+	mib, err := nametomib(name)
+	if err != nil {
+		return nil, err
+	}
+
+	out := aarch64SysctlCPUID{}
+	n := unsafe.Sizeof(out)
+	_, _, errno := syscall.Syscall6(
+		syscall.SYS___SYSCTL,
+		uintptr(unsafe.Pointer(&mib[0])),
+		uintptr(len(mib)),
+		uintptr(unsafe.Pointer(&out)),
+		uintptr(unsafe.Pointer(&n)),
+		uintptr(0),
+		uintptr(0))
+	if errno != 0 {
+		return nil, errno
+	}
+	return &out, nil
+}
+
+func doinit() {
+	cpuid, err := sysctlCPUID("machdep.cpu0.cpu_id")
+	if err != nil {
+		setMinimalFeatures()
+		return
+	}
+	parseARM64SystemRegisters(cpuid.aa64isar0, cpuid.aa64isar1, cpuid.aa64pfr0)
+
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.go
new file mode 100644
index 0000000..85b64d5
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.go
@@ -0,0 +1,65 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// Minimal copy of functionality from x/sys/unix so the cpu package can call
+// sysctl without depending on x/sys/unix.
+
+const (
+	// From OpenBSD's sys/sysctl.h.
+	_CTL_MACHDEP = 7
+
+	// From OpenBSD's machine/cpu.h.
+	_CPU_ID_AA64ISAR0 = 2
+	_CPU_ID_AA64ISAR1 = 3
+)
+
+// Implemented in the runtime package (runtime/sys_openbsd3.go)
+func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+
+//go:linkname syscall_syscall6 syscall.syscall6
+
+func sysctl(mib []uint32, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) {
+	_, _, errno := syscall_syscall6(libc_sysctl_trampoline_addr, uintptr(unsafe.Pointer(&mib[0])), uintptr(len(mib)), uintptr(unsafe.Pointer(old)), uintptr(unsafe.Pointer(oldlen)), uintptr(unsafe.Pointer(new)), uintptr(newlen))
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+var libc_sysctl_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_sysctl sysctl "libc.so"
+
+func sysctlUint64(mib []uint32) (uint64, bool) {
+	var out uint64
+	nout := unsafe.Sizeof(out)
+	if err := sysctl(mib, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); err != nil {
+		return 0, false
+	}
+	return out, true
+}
+
+func doinit() {
+	setMinimalFeatures()
+
+	// Get ID_AA64ISAR0 and ID_AA64ISAR1 from sysctl.
+	isar0, ok := sysctlUint64([]uint32{_CTL_MACHDEP, _CPU_ID_AA64ISAR0})
+	if !ok {
+		return
+	}
+	isar1, ok := sysctlUint64([]uint32{_CTL_MACHDEP, _CPU_ID_AA64ISAR1})
+	if !ok {
+		return
+	}
+	parseARM64SystemRegisters(isar0, isar1, 0)
+
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.s b/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.s
new file mode 100644
index 0000000..054ba05
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_openbsd_arm64.s
@@ -0,0 +1,11 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0
+	JMP	libc_sysctl(SB)
+
+GLOBL	·libc_sysctl_trampoline_addr(SB), RODATA, $8
+DATA	·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
new file mode 100644
index 0000000..e9ecf2a
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
@@ -0,0 +1,9 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux && arm
+
+package cpu
+
+func archInit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
new file mode 100644
index 0000000..5341e7f
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
@@ -0,0 +1,9 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux && !netbsd && !openbsd && arm64
+
+package cpu
+
+func doinit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
new file mode 100644
index 0000000..5f8f241
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
@@ -0,0 +1,11 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux && (mips64 || mips64le)
+
+package cpu
+
+func archInit() {
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go
new file mode 100644
index 0000000..89608fb
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go
@@ -0,0 +1,12 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !aix && !linux && (ppc64 || ppc64le)
+
+package cpu
+
+func archInit() {
+	PPC64.IsPOWER8 = true
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
new file mode 100644
index 0000000..5ab8780
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
@@ -0,0 +1,11 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux && riscv64
+
+package cpu
+
+func archInit() {
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
new file mode 100644
index 0000000..c14f12b
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
@@ -0,0 +1,16 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+
+package cpu
+
+const cacheLineSize = 128
+
+func initOptions() {
+	options = []option{
+		{Name: "darn", Feature: &PPC64.HasDARN},
+		{Name: "scv", Feature: &PPC64.HasSCV},
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
new file mode 100644
index 0000000..aca3199
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@@ -0,0 +1,20 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build riscv64
+
+package cpu
+
+const cacheLineSize = 64
+
+func initOptions() {
+	options = []option{
+		{Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned},
+		{Name: "c", Feature: &RISCV64.HasC},
+		{Name: "v", Feature: &RISCV64.HasV},
+		{Name: "zba", Feature: &RISCV64.HasZba},
+		{Name: "zbb", Feature: &RISCV64.HasZbb},
+		{Name: "zbs", Feature: &RISCV64.HasZbs},
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_s390x.go
new file mode 100644
index 0000000..5881b88
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.go
@@ -0,0 +1,172 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const cacheLineSize = 256
+
+func initOptions() {
+	options = []option{
+		{Name: "zarch", Feature: &S390X.HasZARCH, Required: true},
+		{Name: "stfle", Feature: &S390X.HasSTFLE, Required: true},
+		{Name: "ldisp", Feature: &S390X.HasLDISP, Required: true},
+		{Name: "eimm", Feature: &S390X.HasEIMM, Required: true},
+		{Name: "dfp", Feature: &S390X.HasDFP},
+		{Name: "etf3eh", Feature: &S390X.HasETF3EH},
+		{Name: "msa", Feature: &S390X.HasMSA},
+		{Name: "aes", Feature: &S390X.HasAES},
+		{Name: "aescbc", Feature: &S390X.HasAESCBC},
+		{Name: "aesctr", Feature: &S390X.HasAESCTR},
+		{Name: "aesgcm", Feature: &S390X.HasAESGCM},
+		{Name: "ghash", Feature: &S390X.HasGHASH},
+		{Name: "sha1", Feature: &S390X.HasSHA1},
+		{Name: "sha256", Feature: &S390X.HasSHA256},
+		{Name: "sha3", Feature: &S390X.HasSHA3},
+		{Name: "sha512", Feature: &S390X.HasSHA512},
+		{Name: "vx", Feature: &S390X.HasVX},
+		{Name: "vxe", Feature: &S390X.HasVXE},
+	}
+}
+
+// bitIsSet reports whether the bit at index is set. The bit index
+// is in big endian order, so bit index 0 is the leftmost bit.
+func bitIsSet(bits []uint64, index uint) bool {
+	return bits[index/64]&((1<<63)>>(index%64)) != 0
+}
+
+// facility is a bit index for the named facility.
+type facility uint8
+
+const (
+	// mandatory facilities
+	zarch  facility = 1  // z architecture mode is active
+	stflef facility = 7  // store-facility-list-extended
+	ldisp  facility = 18 // long-displacement
+	eimm   facility = 21 // extended-immediate
+
+	// miscellaneous facilities
+	dfp    facility = 42 // decimal-floating-point
+	etf3eh facility = 30 // extended-translation 3 enhancement
+
+	// cryptography facilities
+	msa  facility = 17  // message-security-assist
+	msa3 facility = 76  // message-security-assist extension 3
+	msa4 facility = 77  // message-security-assist extension 4
+	msa5 facility = 57  // message-security-assist extension 5
+	msa8 facility = 146 // message-security-assist extension 8
+	msa9 facility = 155 // message-security-assist extension 9
+
+	// vector facilities
+	vx   facility = 129 // vector facility
+	vxe  facility = 135 // vector-enhancements 1
+	vxe2 facility = 148 // vector-enhancements 2
+)
+
+// facilityList contains the result of an STFLE call.
+// Bits are numbered in big endian order so the
+// leftmost bit (the MSB) is at index 0.
+type facilityList struct {
+	bits [4]uint64
+}
+
+// Has reports whether the given facilities are present.
+func (s *facilityList) Has(fs ...facility) bool {
+	if len(fs) == 0 {
+		panic("no facility bits provided")
+	}
+	for _, f := range fs {
+		if !bitIsSet(s.bits[:], uint(f)) {
+			return false
+		}
+	}
+	return true
+}
+
+// function is the code for the named cryptographic function.
+type function uint8
+
+const (
+	// KM{,A,C,CTR} function codes
+	aes128 function = 18 // AES-128
+	aes192 function = 19 // AES-192
+	aes256 function = 20 // AES-256
+
+	// K{I,L}MD function codes
+	sha1     function = 1  // SHA-1
+	sha256   function = 2  // SHA-256
+	sha512   function = 3  // SHA-512
+	sha3_224 function = 32 // SHA3-224
+	sha3_256 function = 33 // SHA3-256
+	sha3_384 function = 34 // SHA3-384
+	sha3_512 function = 35 // SHA3-512
+	shake128 function = 36 // SHAKE-128
+	shake256 function = 37 // SHAKE-256
+
+	// KLMD function codes
+	ghash function = 65 // GHASH
+)
+
+// queryResult contains the result of a Query function
+// call. Bits are numbered in big endian order so the
+// leftmost bit (the MSB) is at index 0.
+type queryResult struct {
+	bits [2]uint64
+}
+
+// Has reports whether the given functions are present.
+func (q *queryResult) Has(fns ...function) bool {
+	if len(fns) == 0 {
+		panic("no function codes provided")
+	}
+	for _, f := range fns {
+		if !bitIsSet(q.bits[:], uint(f)) {
+			return false
+		}
+	}
+	return true
+}
+
+func doinit() {
+	initS390Xbase()
+
+	// We need implementations of stfle, km and so on
+	// to detect cryptographic features.
+	if !haveAsmFunctions() {
+		return
+	}
+
+	// optional cryptographic functions
+	if S390X.HasMSA {
+		aes := []function{aes128, aes192, aes256}
+
+		// cipher message
+		km, kmc := kmQuery(), kmcQuery()
+		S390X.HasAES = km.Has(aes...)
+		S390X.HasAESCBC = kmc.Has(aes...)
+		if S390X.HasSTFLE {
+			facilities := stfle()
+			if facilities.Has(msa4) {
+				kmctr := kmctrQuery()
+				S390X.HasAESCTR = kmctr.Has(aes...)
+			}
+			if facilities.Has(msa8) {
+				kma := kmaQuery()
+				S390X.HasAESGCM = kma.Has(aes...)
+			}
+		}
+
+		// compute message digest
+		kimd := kimdQuery() // intermediate (no padding)
+		klmd := klmdQuery() // last (padding)
+		S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1)
+		S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256)
+		S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512)
+		S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist
+		sha3 := []function{
+			sha3_224, sha3_256, sha3_384, sha3_512,
+			shake128, shake256,
+		}
+		S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...)
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.s b/vendor/golang.org/x/sys/cpu/cpu_s390x.s
new file mode 100644
index 0000000..1fb4b70
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.s
@@ -0,0 +1,57 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+
+#include "textflag.h"
+
+// func stfle() facilityList
+TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32
+	MOVD $ret+0(FP), R1
+	MOVD $3, R0          // last doubleword index to store
+	XC   $32, (R1), (R1) // clear 4 doublewords (32 bytes)
+	WORD $0xb2b01000     // store facility list extended (STFLE)
+	RET
+
+// func kmQuery() queryResult
+TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KM-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92E0024    // cipher message (KM)
+	RET
+
+// func kmcQuery() queryResult
+TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMC-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92F0024    // cipher message with chaining (KMC)
+	RET
+
+// func kmctrQuery() queryResult
+TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMCTR-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92D4024    // cipher message with counter (KMCTR)
+	RET
+
+// func kmaQuery() queryResult
+TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMA-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xb9296024    // cipher message with authentication (KMA)
+	RET
+
+// func kimdQuery() queryResult
+TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KIMD-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB93E0024    // compute intermediate message digest (KIMD)
+	RET
+
+// func klmdQuery() queryResult
+TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KLMD-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB93F0024    // compute last message digest (KLMD)
+	RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_wasm.go b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
new file mode 100644
index 0000000..384787e
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
@@ -0,0 +1,17 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build wasm
+
+package cpu
+
+// We're compiling the cpu package for an unknown (software-abstracted) CPU.
+// Make CacheLinePad an empty struct and hope that the usual struct alignment
+// rules are good enough.
+
+const cacheLineSize = 0
+
+func initOptions() {}
+
+func archInit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go
new file mode 100644
index 0000000..c29f5e4
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go
@@ -0,0 +1,151 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64 || amd64p32
+
+package cpu
+
+import "runtime"
+
+const cacheLineSize = 64
+
+func initOptions() {
+	options = []option{
+		{Name: "adx", Feature: &X86.HasADX},
+		{Name: "aes", Feature: &X86.HasAES},
+		{Name: "avx", Feature: &X86.HasAVX},
+		{Name: "avx2", Feature: &X86.HasAVX2},
+		{Name: "avx512", Feature: &X86.HasAVX512},
+		{Name: "avx512f", Feature: &X86.HasAVX512F},
+		{Name: "avx512cd", Feature: &X86.HasAVX512CD},
+		{Name: "avx512er", Feature: &X86.HasAVX512ER},
+		{Name: "avx512pf", Feature: &X86.HasAVX512PF},
+		{Name: "avx512vl", Feature: &X86.HasAVX512VL},
+		{Name: "avx512bw", Feature: &X86.HasAVX512BW},
+		{Name: "avx512dq", Feature: &X86.HasAVX512DQ},
+		{Name: "avx512ifma", Feature: &X86.HasAVX512IFMA},
+		{Name: "avx512vbmi", Feature: &X86.HasAVX512VBMI},
+		{Name: "avx512vnniw", Feature: &X86.HasAVX5124VNNIW},
+		{Name: "avx5124fmaps", Feature: &X86.HasAVX5124FMAPS},
+		{Name: "avx512vpopcntdq", Feature: &X86.HasAVX512VPOPCNTDQ},
+		{Name: "avx512vpclmulqdq", Feature: &X86.HasAVX512VPCLMULQDQ},
+		{Name: "avx512vnni", Feature: &X86.HasAVX512VNNI},
+		{Name: "avx512gfni", Feature: &X86.HasAVX512GFNI},
+		{Name: "avx512vaes", Feature: &X86.HasAVX512VAES},
+		{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
+		{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
+		{Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
+		{Name: "amxtile", Feature: &X86.HasAMXTile},
+		{Name: "amxint8", Feature: &X86.HasAMXInt8},
+		{Name: "amxbf16", Feature: &X86.HasAMXBF16},
+		{Name: "bmi1", Feature: &X86.HasBMI1},
+		{Name: "bmi2", Feature: &X86.HasBMI2},
+		{Name: "cx16", Feature: &X86.HasCX16},
+		{Name: "erms", Feature: &X86.HasERMS},
+		{Name: "fma", Feature: &X86.HasFMA},
+		{Name: "osxsave", Feature: &X86.HasOSXSAVE},
+		{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
+		{Name: "popcnt", Feature: &X86.HasPOPCNT},
+		{Name: "rdrand", Feature: &X86.HasRDRAND},
+		{Name: "rdseed", Feature: &X86.HasRDSEED},
+		{Name: "sse3", Feature: &X86.HasSSE3},
+		{Name: "sse41", Feature: &X86.HasSSE41},
+		{Name: "sse42", Feature: &X86.HasSSE42},
+		{Name: "ssse3", Feature: &X86.HasSSSE3},
+
+		// These capabilities should always be enabled on amd64:
+		{Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"},
+	}
+}
+
+func archInit() {
+
+	Initialized = true
+
+	maxID, _, _, _ := cpuid(0, 0)
+
+	if maxID < 1 {
+		return
+	}
+
+	_, _, ecx1, edx1 := cpuid(1, 0)
+	X86.HasSSE2 = isSet(26, edx1)
+
+	X86.HasSSE3 = isSet(0, ecx1)
+	X86.HasPCLMULQDQ = isSet(1, ecx1)
+	X86.HasSSSE3 = isSet(9, ecx1)
+	X86.HasFMA = isSet(12, ecx1)
+	X86.HasCX16 = isSet(13, ecx1)
+	X86.HasSSE41 = isSet(19, ecx1)
+	X86.HasSSE42 = isSet(20, ecx1)
+	X86.HasPOPCNT = isSet(23, ecx1)
+	X86.HasAES = isSet(25, ecx1)
+	X86.HasOSXSAVE = isSet(27, ecx1)
+	X86.HasRDRAND = isSet(30, ecx1)
+
+	var osSupportsAVX, osSupportsAVX512 bool
+	// For XGETBV, OSXSAVE bit is required and sufficient.
+	if X86.HasOSXSAVE {
+		eax, _ := xgetbv()
+		// Check if XMM and YMM registers have OS support.
+		osSupportsAVX = isSet(1, eax) && isSet(2, eax)
+
+		if runtime.GOOS == "darwin" {
+			// Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers.
+			// Since users can't rely on mask register contents, let's not advertise AVX-512 support.
+			// See issue 49233.
+			osSupportsAVX512 = false
+		} else {
+			// Check if OPMASK and ZMM registers have OS support.
+			osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
+		}
+	}
+
+	X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
+
+	if maxID < 7 {
+		return
+	}
+
+	_, ebx7, ecx7, edx7 := cpuid(7, 0)
+	X86.HasBMI1 = isSet(3, ebx7)
+	X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
+	X86.HasBMI2 = isSet(8, ebx7)
+	X86.HasERMS = isSet(9, ebx7)
+	X86.HasRDSEED = isSet(18, ebx7)
+	X86.HasADX = isSet(19, ebx7)
+
+	X86.HasAVX512 = isSet(16, ebx7) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
+	if X86.HasAVX512 {
+		X86.HasAVX512F = true
+		X86.HasAVX512CD = isSet(28, ebx7)
+		X86.HasAVX512ER = isSet(27, ebx7)
+		X86.HasAVX512PF = isSet(26, ebx7)
+		X86.HasAVX512VL = isSet(31, ebx7)
+		X86.HasAVX512BW = isSet(30, ebx7)
+		X86.HasAVX512DQ = isSet(17, ebx7)
+		X86.HasAVX512IFMA = isSet(21, ebx7)
+		X86.HasAVX512VBMI = isSet(1, ecx7)
+		X86.HasAVX5124VNNIW = isSet(2, edx7)
+		X86.HasAVX5124FMAPS = isSet(3, edx7)
+		X86.HasAVX512VPOPCNTDQ = isSet(14, ecx7)
+		X86.HasAVX512VPCLMULQDQ = isSet(10, ecx7)
+		X86.HasAVX512VNNI = isSet(11, ecx7)
+		X86.HasAVX512GFNI = isSet(8, ecx7)
+		X86.HasAVX512VAES = isSet(9, ecx7)
+		X86.HasAVX512VBMI2 = isSet(6, ecx7)
+		X86.HasAVX512BITALG = isSet(12, ecx7)
+
+		eax71, _, _, _ := cpuid(7, 1)
+		X86.HasAVX512BF16 = isSet(5, eax71)
+	}
+
+	X86.HasAMXTile = isSet(24, edx7)
+	X86.HasAMXInt8 = isSet(25, edx7)
+	X86.HasAMXBF16 = isSet(22, edx7)
+}
+
+func isSet(bitpos uint, value uint32) bool {
+	return value&(1<<bitpos) != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.s b/vendor/golang.org/x/sys/cpu/cpu_x86.s
new file mode 100644
index 0000000..7d7ba33
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.s
@@ -0,0 +1,26 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (386 || amd64 || amd64p32) && gc
+
+#include "textflag.h"
+
+// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·cpuid(SB), NOSPLIT, $0-24
+	MOVL eaxArg+0(FP), AX
+	MOVL ecxArg+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func xgetbv() (eax, edx uint32)
+TEXT ·xgetbv(SB),NOSPLIT,$0-8
+	MOVL $0, CX
+	XGETBV
+	MOVL AX, eax+0(FP)
+	MOVL DX, edx+4(FP)
+	RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_zos.go b/vendor/golang.org/x/sys/cpu/cpu_zos.go
new file mode 100644
index 0000000..5f54683
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_zos.go
@@ -0,0 +1,10 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+func archInit() {
+	doinit()
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go
new file mode 100644
index 0000000..ccb1b70
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go
@@ -0,0 +1,25 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+func initS390Xbase() {
+	// get the facilities list
+	facilities := stfle()
+
+	// mandatory
+	S390X.HasZARCH = facilities.Has(zarch)
+	S390X.HasSTFLE = facilities.Has(stflef)
+	S390X.HasLDISP = facilities.Has(ldisp)
+	S390X.HasEIMM = facilities.Has(eimm)
+
+	// optional
+	S390X.HasETF3EH = facilities.Has(etf3eh)
+	S390X.HasDFP = facilities.Has(dfp)
+	S390X.HasMSA = facilities.Has(msa)
+	S390X.HasVX = facilities.Has(vx)
+	if S390X.HasVX {
+		S390X.HasVXE = facilities.Has(vxe)
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/endian_big.go b/vendor/golang.org/x/sys/cpu/endian_big.go
new file mode 100644
index 0000000..7fe04b0
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/endian_big.go
@@ -0,0 +1,10 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
+
+package cpu
+
+// IsBigEndian records whether the GOARCH's byte order is big endian.
+const IsBigEndian = true
diff --git a/vendor/golang.org/x/sys/cpu/endian_little.go b/vendor/golang.org/x/sys/cpu/endian_little.go
new file mode 100644
index 0000000..48eccc4
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/endian_little.go
@@ -0,0 +1,10 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
+
+package cpu
+
+// IsBigEndian records whether the GOARCH's byte order is big endian.
+const IsBigEndian = false
diff --git a/vendor/golang.org/x/sys/cpu/hwcap_linux.go b/vendor/golang.org/x/sys/cpu/hwcap_linux.go
new file mode 100644
index 0000000..34e49f9
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/hwcap_linux.go
@@ -0,0 +1,71 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"os"
+)
+
+const (
+	_AT_HWCAP  = 16
+	_AT_HWCAP2 = 26
+
+	procAuxv = "/proc/self/auxv"
+
+	uintSize = int(32 << (^uint(0) >> 63))
+)
+
+// For those platforms don't have a 'cpuid' equivalent we use HWCAP/HWCAP2
+// These are initialized in cpu_$GOARCH.go
+// and should not be changed after they are initialized.
+var hwCap uint
+var hwCap2 uint
+
+func readHWCAP() error {
+	// For Go 1.21+, get auxv from the Go runtime.
+	if a := getAuxv(); len(a) > 0 {
+		for len(a) >= 2 {
+			tag, val := a[0], uint(a[1])
+			a = a[2:]
+			switch tag {
+			case _AT_HWCAP:
+				hwCap = val
+			case _AT_HWCAP2:
+				hwCap2 = val
+			}
+		}
+		return nil
+	}
+
+	buf, err := os.ReadFile(procAuxv)
+	if err != nil {
+		// e.g. on android /proc/self/auxv is not accessible, so silently
+		// ignore the error and leave Initialized = false. On some
+		// architectures (e.g. arm64) doinit() implements a fallback
+		// readout and will set Initialized = true again.
+		return err
+	}
+	bo := hostByteOrder()
+	for len(buf) >= 2*(uintSize/8) {
+		var tag, val uint
+		switch uintSize {
+		case 32:
+			tag = uint(bo.Uint32(buf[0:]))
+			val = uint(bo.Uint32(buf[4:]))
+			buf = buf[8:]
+		case 64:
+			tag = uint(bo.Uint64(buf[0:]))
+			val = uint(bo.Uint64(buf[8:]))
+			buf = buf[16:]
+		}
+		switch tag {
+		case _AT_HWCAP:
+			hwCap = val
+		case _AT_HWCAP2:
+			hwCap2 = val
+		}
+	}
+	return nil
+}
diff --git a/vendor/golang.org/x/sys/cpu/parse.go b/vendor/golang.org/x/sys/cpu/parse.go
new file mode 100644
index 0000000..762b63d
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/parse.go
@@ -0,0 +1,43 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import "strconv"
+
+// parseRelease parses a dot-separated version number. It follows the semver
+// syntax, but allows the minor and patch versions to be elided.
+//
+// This is a copy of the Go runtime's parseRelease from
+// https://golang.org/cl/209597.
+func parseRelease(rel string) (major, minor, patch int, ok bool) {
+	// Strip anything after a dash or plus.
+	for i := 0; i < len(rel); i++ {
+		if rel[i] == '-' || rel[i] == '+' {
+			rel = rel[:i]
+			break
+		}
+	}
+
+	next := func() (int, bool) {
+		for i := 0; i < len(rel); i++ {
+			if rel[i] == '.' {
+				ver, err := strconv.Atoi(rel[:i])
+				rel = rel[i+1:]
+				return ver, err == nil
+			}
+		}
+		ver, err := strconv.Atoi(rel)
+		rel = ""
+		return ver, err == nil
+	}
+	if major, ok = next(); !ok || rel == "" {
+		return
+	}
+	if minor, ok = next(); !ok || rel == "" {
+		return
+	}
+	patch, ok = next()
+	return
+}
diff --git a/vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go b/vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go
new file mode 100644
index 0000000..4cd64c7
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go
@@ -0,0 +1,53 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && arm64
+
+package cpu
+
+import (
+	"errors"
+	"io"
+	"os"
+	"strings"
+)
+
+func readLinuxProcCPUInfo() error {
+	f, err := os.Open("/proc/cpuinfo")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	var buf [1 << 10]byte // enough for first CPU
+	n, err := io.ReadFull(f, buf[:])
+	if err != nil && err != io.ErrUnexpectedEOF {
+		return err
+	}
+	in := string(buf[:n])
+	const features = "\nFeatures	: "
+	i := strings.Index(in, features)
+	if i == -1 {
+		return errors.New("no CPU features found")
+	}
+	in = in[i+len(features):]
+	if i := strings.Index(in, "\n"); i != -1 {
+		in = in[:i]
+	}
+	m := map[string]*bool{}
+
+	initOptions() // need it early here; it's harmless to call twice
+	for _, o := range options {
+		m[o.Name] = o.Feature
+	}
+	// The EVTSTRM field has alias "evstrm" in Go, but Linux calls it "evtstrm".
+	m["evtstrm"] = &ARM64.HasEVTSTRM
+
+	for _, f := range strings.Fields(in) {
+		if p, ok := m[f]; ok {
+			*p = true
+		}
+	}
+	return nil
+}
diff --git a/vendor/golang.org/x/sys/cpu/runtime_auxv.go b/vendor/golang.org/x/sys/cpu/runtime_auxv.go
new file mode 100644
index 0000000..5f92ac9
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/runtime_auxv.go
@@ -0,0 +1,16 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+// getAuxvFn is non-nil on Go 1.21+ (via runtime_auxv_go121.go init)
+// on platforms that use auxv.
+var getAuxvFn func() []uintptr
+
+func getAuxv() []uintptr {
+	if getAuxvFn == nil {
+		return nil
+	}
+	return getAuxvFn()
+}
diff --git a/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go b/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go
new file mode 100644
index 0000000..4c9788e
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go
@@ -0,0 +1,18 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.21
+
+package cpu
+
+import (
+	_ "unsafe" // for linkname
+)
+
+//go:linkname runtime_getAuxv runtime.getAuxv
+func runtime_getAuxv() []uintptr
+
+func init() {
+	getAuxvFn = runtime_getAuxv
+}
diff --git a/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go b/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
new file mode 100644
index 0000000..1b9ccb0
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
@@ -0,0 +1,26 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Recreate a getsystemcfg syscall handler instead of
+// using the one provided by x/sys/unix to avoid having
+// the dependency between them. (See golang.org/issue/32102)
+// Moreover, this file will be used during the building of
+// gccgo's libgo and thus must not used a CGo method.
+
+//go:build aix && gccgo
+
+package cpu
+
+import (
+	"syscall"
+)
+
+//extern getsystemcfg
+func gccgoGetsystemcfg(label uint32) (r uint64)
+
+func callgetsystemcfg(label int) (r1 uintptr, e1 syscall.Errno) {
+	r1 = uintptr(gccgoGetsystemcfg(uint32(label)))
+	e1 = syscall.GetErrno()
+	return
+}
diff --git a/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go b/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
new file mode 100644
index 0000000..e8b6cdb
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
@@ -0,0 +1,35 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Minimal copy of x/sys/unix so the cpu package can make a
+// system call on AIX without depending on x/sys/unix.
+// (See golang.org/issue/32102)
+
+//go:build aix && ppc64 && gc
+
+package cpu
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+//go:cgo_import_dynamic libc_getsystemcfg getsystemcfg "libc.a/shr_64.o"
+
+//go:linkname libc_getsystemcfg libc_getsystemcfg
+
+type syscallFunc uintptr
+
+var libc_getsystemcfg syscallFunc
+
+type errno = syscall.Errno
+
+// Implemented in runtime/syscall_aix.go.
+func rawSyscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno)
+func syscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno)
+
+func callgetsystemcfg(label int) (r1 uintptr, e1 errno) {
+	r1, _, e1 = syscall6(uintptr(unsafe.Pointer(&libc_getsystemcfg)), 1, uintptr(label), 0, 0, 0, 0, 0)
+	return
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
new file mode 100644
index 0000000..0ad8814
--- /dev/null
+++ b/vendor/modules.txt
@@ -0,0 +1,18 @@
+# git.autistici.org/ai3/go-common v0.0.0-20241017171051-880a2c5ae7f4
+## explicit; go 1.21.0
+git.autistici.org/ai3/go-common/pwhash
+# github.com/amoghe/go-crypt v0.0.0-20220222110647-20eada5f5964
+## explicit
+github.com/amoghe/go-crypt
+# github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
+## explicit
+github.com/coreos/go-systemd/activation
+# golang.org/x/crypto v0.27.0
+## explicit; go 1.20
+golang.org/x/crypto/argon2
+golang.org/x/crypto/blake2b
+golang.org/x/crypto/pbkdf2
+golang.org/x/crypto/scrypt
+# golang.org/x/sys v0.25.0
+## explicit; go 1.18
+golang.org/x/sys/cpu
-- 
GitLab