diff --git a/Makefile b/Makefile index 0c2443af1..2c89b430a 100644 --- a/Makefile +++ b/Makefile @@ -107,7 +107,7 @@ cyclo: @GO15VENDOREXPERIMENT=1 ${GOPATH}/bin/gocyclo -over 65 pkg build: getdeps verifiers $(UI_ASSETS) isa-l - @GO15VENDOREXPERIMENT=1 go generate ./... + @GO15VENDOREXPERIMENT=1 go generate github.com/minio/minio/pkg/crypto/sha1 deadcode: @GO15VENDOREXPERIMENT=1 ${GOPATH}/bin/deadcode diff --git a/README.md b/README.md index 2111fee07..e0f14c6c7 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,16 @@ Minio is a distributed object storage server written in Golang. Source is available under free software / open source [Apache license 2.0](./LICENSE). API compatible with Amazon S3 cloud storage service. + ## Description Micro services environment provisions one Minio server per application instance. Scalability is achieved through large number of smaller personalized instances. This version of the Minio binary is built using Filesystem storage backend for magnetic and solid state disks. +Minio currently implements two backends + + - Filesystem (FS) - is available and ready for general purpose use. + - ErasureCoded (XL) - is work in progress and not ready for general purpose use. + ## Minio Client [Minio Client (mc)](https://github.com/minio/mc#minio-client-mc-) provides a modern alternative to Unix commands like ``ls``, ``cat``, ``cp``, ``sync``, and ``diff``. It supports POSIX compatible filesystems and Amazon S3 compatible cloud storage systems. It is entirely written in Golang. diff --git a/appveyor.yml b/appveyor.yml index 24fa4a07c..05e07b3f3 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -40,7 +40,7 @@ build_script: - curl -fsSL -o ui-assets.asc https://dl.minio.io/assets/server/ui/%UI_ASSETS_ARMOR% - gpg --batch --no-tty --yes --keyserver pgp.mit.edu --recv-keys F9AAC728 - gpg --batch --no-tty --verify %UI_ASSETS_ARMOR% %UI_ASSETS% - - go generate ./... + - go generate github.com/minio/minio/pkg/crypto/sha1 - go test . - go test -race . - go test github.com/minio/minio/pkg... diff --git a/pkg/cpu/cpu_amd64.go b/pkg/cpu/cpu_amd64.go deleted file mode 100644 index dbcecc179..000000000 --- a/pkg/cpu/cpu_amd64.go +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2015 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package cpu - -// cpuid, cpuidex -func cpuid(op uint32) (eax, ebx, ecx, edx uint32) -func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) - -// HasSSE41 - CPUID instruction verification wrapper for SSE41 extensions -func HasSSE41() bool { - _, _, c, _ := cpuid(1) - return ((c & (1 << 19)) != 0) -} - -// HasAVX - CPUID instruction verification wrapper for AVX extensions -func HasAVX() bool { - _, _, c, _ := cpuid(1) - return ((c & (1 << 28)) != 0) -} - -// HasAVX2 - CPUID instruction verification wrapper for AVX2 extensions -func HasAVX2() bool { - _, b, _, _ := cpuidex(7, 0) - return ((b & (1 << 5)) != 0) -} diff --git a/pkg/cpu/cpu_amd64.s b/pkg/cpu/cpu_amd64.s deleted file mode 100644 index 6ead70174..000000000 --- a/pkg/cpu/cpu_amd64.s +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. -// -// See https://github.com/klauspost/cpuid/blob/master/LICENSE -// -// Using this inside Minio with modifications -// - -// func cpuid(op uint32) (eax, ebx, ecx, edx uint32) -TEXT ·cpuid(SB),7,$0 - MOVL op+0(FP),AX - CPUID - MOVL AX,eax+8(FP) - MOVL BX,ebx+12(FP) - MOVL CX,ecx+16(FP) - MOVL DX,edx+20(FP) - RET - -// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) -TEXT ·cpuidex(SB),7,$0 - MOVL op+0(FP),AX - MOVL op2+4(FP),CX - CPUID - MOVL AX,eax+8(FP) - MOVL BX,ebx+12(FP) - MOVL CX,ecx+16(FP) - MOVL DX,edx+20(FP) - RET diff --git a/pkg/cpu/cpu_arm.go b/pkg/cpu/cpu_arm.go deleted file mode 100644 index f2063689b..000000000 --- a/pkg/cpu/cpu_arm.go +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2015 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package cpu - -// HasSSE41 - CPUID instruction verification wrapper for SSE41 extensions -func HasSSE41() bool { - return false -} - -// HasAVX - CPUID instruction verification wrapper for AVX extensions -func HasAVX() bool { - return false -} - -// HasAVX2 - CPUID instruction verification wrapper for AVX2 extensions -func HasAVX2() bool { - return false -} diff --git a/pkg/cpu/cpu_test.go b/pkg/cpu/cpu_test.go deleted file mode 100644 index 8286413e9..000000000 --- a/pkg/cpu/cpu_test.go +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2015 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package cpu_test - -import ( - "errors" - "os/exec" - "runtime" - "strings" - "testing" - - "github.com/minio/minio/pkg/cpu" - . "gopkg.in/check.v1" -) - -func Test(t *testing.T) { TestingT(t) } - -type MySuite struct{} - -var _ = Suite(&MySuite{}) - -func hasCPUFeatureFromOS(feature string) (bool, error) { - if runtime.GOOS == "linux" { - command := exec.Command("/bin/cat", "/proc/cpuinfo") - output, err := command.Output() - if err != nil { - return false, err - } - if strings.Contains(string(output), feature) { - return true, nil - } - return false, nil - } - return false, errors.New("Not Implemented on this platform") -} - -func (s *MySuite) TestHasSSE41(c *C) { - if runtime.GOOS == "linux" { - var flag = cpu.HasSSE41() - osCheck, err := hasCPUFeatureFromOS("sse4_1") - c.Assert(err, IsNil) - c.Check(flag, Equals, osCheck) - } -} - -func (s *MySuite) TestHasAVX(c *C) { - if runtime.GOOS == "linux" { - var flag = cpu.HasAVX() - osFlag, err := hasCPUFeatureFromOS("avx") - c.Assert(err, IsNil) - c.Check(osFlag, Equals, flag) - } -} - -func (s *MySuite) TestHasAVX2(c *C) { - if runtime.GOOS == "linux" { - var flag = cpu.HasAVX2() - osFlag, err := hasCPUFeatureFromOS("avx2") - c.Assert(err, IsNil) - c.Check(osFlag, Equals, flag) - } -} diff --git a/pkg/cpu/doc.go b/pkg/cpu/doc.go deleted file mode 100644 index 899c65d23..000000000 --- a/pkg/cpu/doc.go +++ /dev/null @@ -1,11 +0,0 @@ -// Package cpu provides wrapper around assembly functions for checking processor -// instruction capabilities for SSE4.1, AVX, AVX2 support -// -// Example -// -// ``cpu.HasSSE41()`` returns true for SSE4.1 instruction support, false otherwise -// -// ``cpu.HasAVX()`` returns true for AVX instruction support, false otherwise -// -// ``cpu.HasAVX2()`` returns true for AVX2 instruction support, false otherwise -package cpu diff --git a/pkg/crypto/sha1/sha1block.go b/pkg/crypto/sha1/sha1block.go index 5232281d2..fc6f1f61e 100644 --- a/pkg/crypto/sha1/sha1block.go +++ b/pkg/crypto/sha1/sha1block.go @@ -26,12 +26,12 @@ import "C" import ( "unsafe" - "github.com/minio/minio/pkg/cpu" + "github.com/klauspost/cpuid" ) func block(dig *digest, p []byte) { switch true { - case cpu.HasSSE41() == true: + case cpuid.CPU.SSE3(): blockSSE3(dig, p) default: blockGeneric(dig, p) diff --git a/pkg/crypto/sha1/sha1block_linux.go b/pkg/crypto/sha1/sha1block_linux.go index d370d89c3..ab8003c5f 100644 --- a/pkg/crypto/sha1/sha1block_linux.go +++ b/pkg/crypto/sha1/sha1block_linux.go @@ -27,14 +27,14 @@ import "C" import ( "unsafe" - "github.com/minio/minio/pkg/cpu" + "github.com/klauspost/cpuid" ) func block(dig *digest, p []byte) { switch true { - case cpu.HasAVX2(): + case cpuid.CPU.AVX2(): blockAVX2(dig, p) - case cpu.HasSSE41(): + case cpuid.CPU.SSE3(): blockSSE3(dig, p) default: blockGeneric(dig, p) diff --git a/pkg/crypto/sha256/sha256_linux.go b/pkg/crypto/sha256/sha256_linux.go index c38defe74..74f6a49bc 100644 --- a/pkg/crypto/sha256/sha256_linux.go +++ b/pkg/crypto/sha256/sha256_linux.go @@ -31,7 +31,7 @@ package sha256 import ( "hash" - "github.com/minio/minio/pkg/cpu" + "github.com/klauspost/cpuid" ) // Size - The size of a SHA256 checksum in bytes. @@ -76,11 +76,11 @@ func (d *digest) Reset() { func block(dig *digest, p []byte) { switch true { - case cpu.HasAVX2() == true: + case cpuid.CPU.AVX2(): blockAVX2(dig, p) - case cpu.HasAVX() == true: + case cpuid.CPU.AVX(): blockAVX(dig, p) - case cpu.HasSSE41() == true: + case cpuid.CPU.SSSE3(): blockSSE(dig, p) default: blockGeneric(dig, p) diff --git a/pkg/crypto/sha512/sha512_linux.go b/pkg/crypto/sha512/sha512_linux.go index 6fdda8d63..d3a8877fe 100644 --- a/pkg/crypto/sha512/sha512_linux.go +++ b/pkg/crypto/sha512/sha512_linux.go @@ -16,7 +16,7 @@ package sha512 import ( "hash" - "github.com/minio/minio/pkg/cpu" + "github.com/klauspost/cpuid" ) // Size - The size of a SHA512 checksum in bytes. @@ -47,11 +47,11 @@ type digest struct { func block(dig *digest, p []byte) { switch true { - case cpu.HasAVX2() == true: + case cpuid.CPU.AVX2(): blockAVX2(dig, p) - case cpu.HasAVX() == true: + case cpuid.CPU.AVX(): blockAVX(dig, p) - case cpu.HasSSE41() == true: + case cpuid.CPU.SSSE3(): blockSSE(dig, p) default: blockGeneric(dig, p) diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/LICENSE new file mode 100644 index 000000000..5cec7ee94 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Klaus Post + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md new file mode 100644 index 000000000..b2b6bee87 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/README.md @@ -0,0 +1,145 @@ +# cpuid +Package cpuid provides information about the CPU running the current program. + +CPU features are detected on startup, and kept for fast access through the life of the application. +Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use. + +You can access the CPU information by accessing the shared CPU variable of the cpuid library. + +Package home: https://github.com/klauspost/cpuid + +[![GoDoc][1]][2] [![Build Status][3]][4] + +[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg +[2]: https://godoc.org/github.com/klauspost/cpuid +[3]: https://travis-ci.org/klauspost/cpuid.svg +[4]: https://travis-ci.org/klauspost/cpuid + +# features +## CPU Instructions +* **CMOV** (i686 CMOV) +* **NX** (NX (No-Execute) bit) +* **AMD3DNOW** (AMD 3DNOW) +* **AMD3DNOWEXT** (AMD 3DNowExt) +* **MMX** (standard MMX) +* **MMXEXT** (SSE integer functions or AMD MMX ext) +* **SSE** (SSE functions) +* **SSE2** (P4 SSE functions) +* **SSE3** (Prescott SSE3 functions) +* **SSSE3** (Conroe SSSE3 functions) +* **SSE4** (Penryn SSE4.1 functions) +* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions) +* **SSE42** (Nehalem SSE4.2 functions) +* **AVX** (AVX functions) +* **AVX2** (AVX2 functions) +* **FMA3** (Intel FMA 3) +* **FMA4** (Bulldozer FMA4 functions) +* **XOP** (Bulldozer XOP functions) +* **F16C** (Half-precision floating-point conversion) +* **BMI1** (Bit Manipulation Instruction Set 1) +* **BMI2** (Bit Manipulation Instruction Set 2) +* **TBM** (AMD Trailing Bit Manipulation) +* **LZCNT** (LZCNT instruction) +* **POPCNT** (POPCNT instruction) +* **AESNI** (Advanced Encryption Standard New Instructions) +* **CLMUL** (Carry-less Multiplication) +* **HTT** (Hyperthreading (enabled)) +* **HLE** (Hardware Lock Elision) +* **RTM** (Restricted Transactional Memory) +* **RDRAND** (RDRAND instruction is available) +* **RDSEED** (RDSEED instruction is available) +* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions)) +* **SHA** (Intel SHA Extensions) +* **AVX512F** (AVX-512 Foundation) +* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions) +* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions) +* **AVX512PF** (AVX-512 Prefetch Instructions) +* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions) +* **AVX512CD** (AVX-512 Conflict Detection Instructions) +* **AVX512BW** (AVX-512 Byte and Word Instructions) +* **AVX512VL** (AVX-512 Vector Length Extensions) +* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions) +* **MPX** (Intel MPX (Memory Protection Extensions)) +* **ERMS** (Enhanced REP MOVSB/STOSB) +* **RDTSCP** (RDTSCP Instruction) +* **CX16** (CMPXCHG16B Instruction) +* **SGX** (Software Guard Extensions, with activation details) + +## Performance +* **RDTSCP()** Returns current cycle count. Can be used for benchmarking. +* **SSE2SLOW** (SSE2 is supported, but usually not faster) +* **SSE3SLOW** (SSE3 is supported, but usually not faster) +* **ATOM** (Atom processor, some SSSE3 instructions are slower) +* **Cache line** (Probable size of a cache line). +* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs. + +## Cpu Vendor/VM +* **Intel** +* **AMD** +* **VIA** +* **Transmeta** +* **NSC** +* **KVM** (Kernel-based Virtual Machine) +* **MSVM** (Microsoft Hyper-V or Windows Virtual PC) +* **VMware** +* **XenHVM** + +# installing + +```go get github.com/klauspost/cpuid``` + +# example + +```Go +package main + +import ( + "fmt" + "github.com/klauspost/cpuid" +) + +func main() { + // Print basic CPU information: + fmt.Println("Name:", cpuid.CPU.BrandName) + fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores) + fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore) + fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores) + fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model) + fmt.Println("Features:", cpuid.CPU.Features) + fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine) + fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes") + fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes") + fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes") + fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes") + + // Test if we have a specific feature: + if cpuid.CPU.SSE() { + fmt.Println("We have Streaming SIMD Extensions") + } +} +``` + +Sample output: +``` +>go run main.go +Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz +PhysicalCores: 2 +ThreadsPerCore: 2 +LogicalCores: 4 +Family 6 Model: 42 +Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL +Cacheline bytes: 64 +We have Streaming SIMD Extensions +``` + +# private package + +In the "private" folder you can find an autogenerated version of the library you can include in your own packages. + +For this purpose all exports are removed, and functions and constants are lowercased. + +This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages. + +# license + +This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go new file mode 100644 index 000000000..9230ca562 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid.go @@ -0,0 +1,1022 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// Package cpuid provides information about the CPU running the current program. +// +// CPU features are detected on startup, and kept for fast access through the life of the application. +// Currently x86 / x64 (AMD64) is supported. +// +// You can access the CPU information by accessing the shared CPU variable of the cpuid library. +// +// Package home: https://github.com/klauspost/cpuid +package cpuid + +import "strings" + +// Vendor is a representation of a CPU vendor. +type Vendor int + +const ( + Other Vendor = iota + Intel + AMD + VIA + Transmeta + NSC + KVM // Kernel-based Virtual Machine + MSVM // Microsoft Hyper-V or Windows Virtual PC + VMware + XenHVM +) + +const ( + CMOV = 1 << iota // i686 CMOV + NX // NX (No-Execute) bit + AMD3DNOW // AMD 3DNOW + AMD3DNOWEXT // AMD 3DNowExt + MMX // standard MMX + MMXEXT // SSE integer functions or AMD MMX ext + SSE // SSE functions + SSE2 // P4 SSE functions + SSE3 // Prescott SSE3 functions + SSSE3 // Conroe SSSE3 functions + SSE4 // Penryn SSE4.1 functions + SSE4A // AMD Barcelona microarchitecture SSE4a instructions + SSE42 // Nehalem SSE4.2 functions + AVX // AVX functions + AVX2 // AVX2 functions + FMA3 // Intel FMA 3 + FMA4 // Bulldozer FMA4 functions + XOP // Bulldozer XOP functions + F16C // Half-precision floating-point conversion + BMI1 // Bit Manipulation Instruction Set 1 + BMI2 // Bit Manipulation Instruction Set 2 + TBM // AMD Trailing Bit Manipulation + LZCNT // LZCNT instruction + POPCNT // POPCNT instruction + AESNI // Advanced Encryption Standard New Instructions + CLMUL // Carry-less Multiplication + HTT // Hyperthreading (enabled) + HLE // Hardware Lock Elision + RTM // Restricted Transactional Memory + RDRAND // RDRAND instruction is available + RDSEED // RDSEED instruction is available + ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + SHA // Intel SHA Extensions + AVX512F // AVX-512 Foundation + AVX512DQ // AVX-512 Doubleword and Quadword Instructions + AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF // AVX-512 Prefetch Instructions + AVX512ER // AVX-512 Exponential and Reciprocal Instructions + AVX512CD // AVX-512 Conflict Detection Instructions + AVX512BW // AVX-512 Byte and Word Instructions + AVX512VL // AVX-512 Vector Length Extensions + AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions + MPX // Intel MPX (Memory Protection Extensions) + ERMS // Enhanced REP MOVSB/STOSB + RDTSCP // RDTSCP Instruction + CX16 // CMPXCHG16B Instruction + SGX // Software Guard Extensions + + // Performance indicators + SSE2SLOW // SSE2 is supported, but usually not faster + SSE3SLOW // SSE3 is supported, but usually not faster + ATOM // Atom processor, some SSSE3 instructions are slower +) + +var flagNames = map[Flags]string{ + CMOV: "CMOV", // i686 CMOV + NX: "NX", // NX (No-Execute) bit + AMD3DNOW: "AMD3DNOW", // AMD 3DNOW + AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt + MMX: "MMX", // Standard MMX + MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext + SSE: "SSE", // SSE functions + SSE2: "SSE2", // P4 SSE2 functions + SSE3: "SSE3", // Prescott SSE3 functions + SSSE3: "SSSE3", // Conroe SSSE3 functions + SSE4: "SSE4.1", // Penryn SSE4.1 functions + SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions + SSE42: "SSE4.2", // Nehalem SSE4.2 functions + AVX: "AVX", // AVX functions + AVX2: "AVX2", // AVX functions + FMA3: "FMA3", // Intel FMA 3 + FMA4: "FMA4", // Bulldozer FMA4 functions + XOP: "XOP", // Bulldozer XOP functions + F16C: "F16C", // Half-precision floating-point conversion + BMI1: "BMI1", // Bit Manipulation Instruction Set 1 + BMI2: "BMI2", // Bit Manipulation Instruction Set 2 + TBM: "TBM", // AMD Trailing Bit Manipulation + LZCNT: "LZCNT", // LZCNT instruction + POPCNT: "POPCNT", // POPCNT instruction + AESNI: "AESNI", // Advanced Encryption Standard New Instructions + CLMUL: "CLMUL", // Carry-less Multiplication + HTT: "HTT", // Hyperthreading (enabled) + HLE: "HLE", // Hardware Lock Elision + RTM: "RTM", // Restricted Transactional Memory + RDRAND: "RDRAND", // RDRAND instruction is available + RDSEED: "RDSEED", // RDSEED instruction is available + ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + SHA: "SHA", // Intel SHA Extensions + AVX512F: "AVX512F", // AVX-512 Foundation + AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions + AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions + AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions + AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions + AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions + AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions + AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions + MPX: "MPX", // Intel MPX (Memory Protection Extensions) + ERMS: "ERMS", // Enhanced REP MOVSB/STOSB + RDTSCP: "RDTSCP", // RDTSCP Instruction + CX16: "CX16", // CMPXCHG16B Instruction + SGX: "SGX", // Software Guard Extensions + + // Performance indicators + SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster + SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster + ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower + +} + +// CPUInfo contains information about the detected system CPU. +type CPUInfo struct { + BrandName string // Brand name reported by the CPU + VendorID Vendor // Comparable CPU vendor ID + Features Flags // Features of the CPU + PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. + ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. + LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. + Family int // CPU family number + Model int // CPU model number + CacheLine int // Cache line size in bytes. Will be 0 if undetectable. + Cache struct { + L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected + L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected + L2 int // L2 Cache (per core or shared). Will be -1 if undetected + L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected + } + SGX SGXSupport + maxFunc uint32 + maxExFunc uint32 +} + +var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) +var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) +var xgetbv func(index uint32) (eax, edx uint32) +var rdtscpAsm func() (eax, ebx, ecx, edx uint32) + +// CPU contains information about the CPU as detected on startup, +// or when Detect last was called. +// +// Use this as the primary entry point to you data, +// this way queries are +var CPU CPUInfo + +func init() { + initCPU() + Detect() +} + +// Detect will re-detect current CPU info. +// This will replace the content of the exported CPU variable. +// +// Unless you expect the CPU to change while you are running your program +// you should not need to call this function. +// If you call this, you must ensure that no other goroutine is accessing the +// exported CPU variable. +func Detect() { + CPU.maxFunc = maxFunctionID() + CPU.maxExFunc = maxExtendedFunction() + CPU.BrandName = brandName() + CPU.CacheLine = cacheLine() + CPU.Family, CPU.Model = familyModel() + CPU.Features = support() + CPU.SGX = sgx(CPU.Features&SGX != 0) + CPU.ThreadsPerCore = threadsPerCore() + CPU.LogicalCores = logicalCores() + CPU.PhysicalCores = physicalCores() + CPU.VendorID = vendorID() + CPU.cacheSize() +} + +// Generated here: http://play.golang.org/p/BxFH2Gdc0G + +// Cmov indicates support of CMOV instructions +func (c CPUInfo) Cmov() bool { + return c.Features&CMOV != 0 +} + +// Amd3dnow indicates support of AMD 3DNOW! instructions +func (c CPUInfo) Amd3dnow() bool { + return c.Features&AMD3DNOW != 0 +} + +// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions +func (c CPUInfo) Amd3dnowExt() bool { + return c.Features&AMD3DNOWEXT != 0 +} + +// MMX indicates support of MMX instructions +func (c CPUInfo) MMX() bool { + return c.Features&MMX != 0 +} + +// MMXExt indicates support of MMXEXT instructions +// (SSE integer functions or AMD MMX ext) +func (c CPUInfo) MMXExt() bool { + return c.Features&MMXEXT != 0 +} + +// SSE indicates support of SSE instructions +func (c CPUInfo) SSE() bool { + return c.Features&SSE != 0 +} + +// SSE2 indicates support of SSE 2 instructions +func (c CPUInfo) SSE2() bool { + return c.Features&SSE2 != 0 +} + +// SSE3 indicates support of SSE 3 instructions +func (c CPUInfo) SSE3() bool { + return c.Features&SSE3 != 0 +} + +// SSSE3 indicates support of SSSE 3 instructions +func (c CPUInfo) SSSE3() bool { + return c.Features&SSSE3 != 0 +} + +// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions +func (c CPUInfo) SSE4() bool { + return c.Features&SSE4 != 0 +} + +// SSE42 indicates support of SSE4.2 instructions +func (c CPUInfo) SSE42() bool { + return c.Features&SSE42 != 0 +} + +// AVX indicates support of AVX instructions +// and operating system support of AVX instructions +func (c CPUInfo) AVX() bool { + return c.Features&AVX != 0 +} + +// AVX2 indicates support of AVX2 instructions +func (c CPUInfo) AVX2() bool { + return c.Features&AVX2 != 0 +} + +// FMA3 indicates support of FMA3 instructions +func (c CPUInfo) FMA3() bool { + return c.Features&FMA3 != 0 +} + +// FMA4 indicates support of FMA4 instructions +func (c CPUInfo) FMA4() bool { + return c.Features&FMA4 != 0 +} + +// XOP indicates support of XOP instructions +func (c CPUInfo) XOP() bool { + return c.Features&XOP != 0 +} + +// F16C indicates support of F16C instructions +func (c CPUInfo) F16C() bool { + return c.Features&F16C != 0 +} + +// BMI1 indicates support of BMI1 instructions +func (c CPUInfo) BMI1() bool { + return c.Features&BMI1 != 0 +} + +// BMI2 indicates support of BMI2 instructions +func (c CPUInfo) BMI2() bool { + return c.Features&BMI2 != 0 +} + +// TBM indicates support of TBM instructions +// (AMD Trailing Bit Manipulation) +func (c CPUInfo) TBM() bool { + return c.Features&TBM != 0 +} + +// Lzcnt indicates support of LZCNT instruction +func (c CPUInfo) Lzcnt() bool { + return c.Features&LZCNT != 0 +} + +// Popcnt indicates support of POPCNT instruction +func (c CPUInfo) Popcnt() bool { + return c.Features&POPCNT != 0 +} + +// HTT indicates the processor has Hyperthreading enabled +func (c CPUInfo) HTT() bool { + return c.Features&HTT != 0 +} + +// SSE2Slow indicates that SSE2 may be slow on this processor +func (c CPUInfo) SSE2Slow() bool { + return c.Features&SSE2SLOW != 0 +} + +// SSE3Slow indicates that SSE3 may be slow on this processor +func (c CPUInfo) SSE3Slow() bool { + return c.Features&SSE3SLOW != 0 +} + +// AesNi indicates support of AES-NI instructions +// (Advanced Encryption Standard New Instructions) +func (c CPUInfo) AesNi() bool { + return c.Features&AESNI != 0 +} + +// Clmul indicates support of CLMUL instructions +// (Carry-less Multiplication) +func (c CPUInfo) Clmul() bool { + return c.Features&CLMUL != 0 +} + +// NX indicates support of NX (No-Execute) bit +func (c CPUInfo) NX() bool { + return c.Features&NX != 0 +} + +// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions +func (c CPUInfo) SSE4A() bool { + return c.Features&SSE4A != 0 +} + +// HLE indicates support of Hardware Lock Elision +func (c CPUInfo) HLE() bool { + return c.Features&HLE != 0 +} + +// RTM indicates support of Restricted Transactional Memory +func (c CPUInfo) RTM() bool { + return c.Features&RTM != 0 +} + +// Rdrand indicates support of RDRAND instruction is available +func (c CPUInfo) Rdrand() bool { + return c.Features&RDRAND != 0 +} + +// Rdseed indicates support of RDSEED instruction is available +func (c CPUInfo) Rdseed() bool { + return c.Features&RDSEED != 0 +} + +// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) +func (c CPUInfo) ADX() bool { + return c.Features&ADX != 0 +} + +// SHA indicates support of Intel SHA Extensions +func (c CPUInfo) SHA() bool { + return c.Features&SHA != 0 +} + +// AVX512F indicates support of AVX-512 Foundation +func (c CPUInfo) AVX512F() bool { + return c.Features&AVX512F != 0 +} + +// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions +func (c CPUInfo) AVX512DQ() bool { + return c.Features&AVX512DQ != 0 +} + +// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions +func (c CPUInfo) AVX512IFMA() bool { + return c.Features&AVX512IFMA != 0 +} + +// AVX512PF indicates support of AVX-512 Prefetch Instructions +func (c CPUInfo) AVX512PF() bool { + return c.Features&AVX512PF != 0 +} + +// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions +func (c CPUInfo) AVX512ER() bool { + return c.Features&AVX512ER != 0 +} + +// AVX512CD indicates support of AVX-512 Conflict Detection Instructions +func (c CPUInfo) AVX512CD() bool { + return c.Features&AVX512CD != 0 +} + +// AVX512BW indicates support of AVX-512 Byte and Word Instructions +func (c CPUInfo) AVX512BW() bool { + return c.Features&AVX512BW != 0 +} + +// AVX512VL indicates support of AVX-512 Vector Length Extensions +func (c CPUInfo) AVX512VL() bool { + return c.Features&AVX512VL != 0 +} + +// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions +func (c CPUInfo) AVX512VBMI() bool { + return c.Features&AVX512VBMI != 0 +} + +// MPX indicates support of Intel MPX (Memory Protection Extensions) +func (c CPUInfo) MPX() bool { + return c.Features&MPX != 0 +} + +// ERMS indicates support of Enhanced REP MOVSB/STOSB +func (c CPUInfo) ERMS() bool { + return c.Features&ERMS != 0 +} + +func (c CPUInfo) RDTSCP() bool { + return c.Features&RDTSCP != 0 +} + +func (c CPUInfo) CX16() bool { + return c.Features&CX16 != 0 +} + +// Atom indicates an Atom processor +func (c CPUInfo) Atom() bool { + return c.Features&ATOM != 0 +} + +// Intel returns true if vendor is recognized as Intel +func (c CPUInfo) Intel() bool { + return c.VendorID == Intel +} + +// AMD returns true if vendor is recognized as AMD +func (c CPUInfo) AMD() bool { + return c.VendorID == AMD +} + +// Transmeta returns true if vendor is recognized as Transmeta +func (c CPUInfo) Transmeta() bool { + return c.VendorID == Transmeta +} + +// NSC returns true if vendor is recognized as National Semiconductor +func (c CPUInfo) NSC() bool { + return c.VendorID == NSC +} + +// VIA returns true if vendor is recognized as VIA +func (c CPUInfo) VIA() bool { + return c.VendorID == VIA +} + +// RTCounter returns the 64-bit time-stamp counter +// Uses the RDTSCP instruction. The value 0 is returned +// if the CPU does not support the instruction. +func (c CPUInfo) RTCounter() uint64 { + if !c.RDTSCP() { + return 0 + } + a, _, _, d := rdtscpAsm() + return uint64(a) | (uint64(d) << 32) +} + +// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. +// This variable is OS dependent, but on Linux contains information +// about the current cpu/core the code is running on. +// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. +func (c CPUInfo) Ia32TscAux() uint32 { + if !c.RDTSCP() { + return 0 + } + _, _, ecx, _ := rdtscpAsm() + return ecx +} + +// LogicalCPU will return the Logical CPU the code is currently executing on. +// This is likely to change when the OS re-schedules the running thread +// to another CPU. +// If the current core cannot be detected, -1 will be returned. +func (c CPUInfo) LogicalCPU() int { + if c.maxFunc < 1 { + return -1 + } + _, ebx, _, _ := cpuid(1) + return int(ebx >> 24) +} + +// VM Will return true if the cpu id indicates we are in +// a virtual machine. This is only a hint, and will very likely +// have many false negatives. +func (c CPUInfo) VM() bool { + switch c.VendorID { + case MSVM, KVM, VMware, XenHVM: + return true + } + return false +} + +// Flags contains detected cpu features and caracteristics +type Flags uint64 + +// String returns a string representation of the detected +// CPU features. +func (f Flags) String() string { + return strings.Join(f.Strings(), ",") +} + +// Strings returns and array of the detected features. +func (f Flags) Strings() []string { + s := support() + r := make([]string, 0, 20) + for i := uint(0); i < 64; i++ { + key := Flags(1 << i) + val := flagNames[key] + if s&key != 0 { + r = append(r, val) + } + } + return r +} + +func maxExtendedFunction() uint32 { + eax, _, _, _ := cpuid(0x80000000) + return eax +} + +func maxFunctionID() uint32 { + a, _, _, _ := cpuid(0) + return a +} + +func brandName() string { + if maxExtendedFunction() >= 0x80000004 { + v := make([]uint32, 0, 48) + for i := uint32(0); i < 3; i++ { + a, b, c, d := cpuid(0x80000002 + i) + v = append(v, a, b, c, d) + } + return strings.Trim(string(valAsString(v...)), " ") + } + return "unknown" +} + +func threadsPerCore() int { + mfi := maxFunctionID() + if mfi < 0x4 || vendorID() != Intel { + return 1 + } + + if mfi < 0xb { + _, b, _, d := cpuid(1) + if (d & (1 << 28)) != 0 { + // v will contain logical core count + v := (b >> 16) & 255 + if v > 1 { + a4, _, _, _ := cpuid(4) + // physical cores + v2 := (a4 >> 26) + 1 + if v2 > 0 { + return int(v) / int(v2) + } + } + } + return 1 + } + _, b, _, _ := cpuidex(0xb, 0) + if b&0xffff == 0 { + return 1 + } + return int(b & 0xffff) +} + +func logicalCores() int { + mfi := maxFunctionID() + switch vendorID() { + case Intel: + // Use this on old Intel processors + if mfi < 0xb { + if mfi < 1 { + return 0 + } + // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) + // that can be assigned to logical processors in a physical package. + // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. + _, ebx, _, _ := cpuid(1) + logical := (ebx >> 16) & 0xff + return int(logical) + } + _, b, _, _ := cpuidex(0xb, 1) + return int(b & 0xffff) + case AMD: + _, b, _, _ := cpuid(1) + return int((b >> 16) & 0xff) + default: + return 0 + } +} + +func familyModel() (int, int) { + if maxFunctionID() < 0x1 { + return 0, 0 + } + eax, _, _, _ := cpuid(1) + family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) + model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) + return int(family), int(model) +} + +func physicalCores() int { + switch vendorID() { + case Intel: + return logicalCores() / threadsPerCore() + case AMD: + if maxExtendedFunction() >= 0x80000008 { + _, _, c, _ := cpuid(0x80000008) + return int(c&0xff) + 1 + } + } + return 0 +} + +// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID +var vendorMapping = map[string]Vendor{ + "AMDisbetter!": AMD, + "AuthenticAMD": AMD, + "CentaurHauls": VIA, + "GenuineIntel": Intel, + "TransmetaCPU": Transmeta, + "GenuineTMx86": Transmeta, + "Geode by NSC": NSC, + "VIA VIA VIA ": VIA, + "KVMKVMKVMKVM": KVM, + "Microsoft Hv": MSVM, + "VMwareVMware": VMware, + "XenVMMXenVMM": XenHVM, +} + +func vendorID() Vendor { + _, b, c, d := cpuid(0) + v := valAsString(b, d, c) + vend, ok := vendorMapping[string(v)] + if !ok { + return Other + } + return vend +} + +func cacheLine() int { + if maxFunctionID() < 0x1 { + return 0 + } + + _, ebx, _, _ := cpuid(1) + cache := (ebx & 0xff00) >> 5 // cflush size + if cache == 0 && maxExtendedFunction() >= 0x80000006 { + _, _, ecx, _ := cpuid(0x80000006) + cache = ecx & 0xff // cacheline size + } + // TODO: Read from Cache and TLB Information + return int(cache) +} + +func (c *CPUInfo) cacheSize() { + c.Cache.L1D = -1 + c.Cache.L1I = -1 + c.Cache.L2 = -1 + c.Cache.L3 = -1 + vendor := vendorID() + switch vendor { + case Intel: + if maxFunctionID() < 4 { + return + } + for i := uint32(0); ; i++ { + eax, ebx, ecx, _ := cpuidex(4, i) + cacheType := eax & 15 + if cacheType == 0 { + break + } + cacheLevel := (eax >> 5) & 7 + coherency := int(ebx&0xfff) + 1 + partitions := int((ebx>>12)&0x3ff) + 1 + associativity := int((ebx>>22)&0x3ff) + 1 + sets := int(ecx) + 1 + size := associativity * partitions * coherency * sets + switch cacheLevel { + case 1: + if cacheType == 1 { + // 1 = Data Cache + c.Cache.L1D = size + } else if cacheType == 2 { + // 2 = Instruction Cache + c.Cache.L1I = size + } else { + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + case AMD: + // Untested. + if maxExtendedFunction() < 0x80000005 { + return + } + _, _, ecx, edx := cpuid(0x80000005) + c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) + c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) + + if maxExtendedFunction() < 0x80000006 { + return + } + _, _, ecx, _ = cpuid(0x80000006) + c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) + } + + return +} + +type SGXSupport struct { + Available bool + SGX1Supported bool + SGX2Supported bool + MaxEnclaveSizeNot64 int64 + MaxEnclaveSize64 int64 +} + +func sgx(available bool) (rval SGXSupport) { + rval.Available = available + + if !available { + return + } + + a, _, _, d := cpuidex(0x12, 0) + rval.SGX1Supported = a&0x01 != 0 + rval.SGX2Supported = a&0x02 != 0 + rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 + rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 + + return +} + +func support() Flags { + mfi := maxFunctionID() + vend := vendorID() + if mfi < 0x1 { + return 0 + } + rval := uint64(0) + _, _, c, d := cpuid(1) + if (d & (1 << 15)) != 0 { + rval |= CMOV + } + if (d & (1 << 23)) != 0 { + rval |= MMX + } + if (d & (1 << 25)) != 0 { + rval |= MMXEXT + } + if (d & (1 << 25)) != 0 { + rval |= SSE + } + if (d & (1 << 26)) != 0 { + rval |= SSE2 + } + if (c & 1) != 0 { + rval |= SSE3 + } + if (c & 0x00000200) != 0 { + rval |= SSSE3 + } + if (c & 0x00080000) != 0 { + rval |= SSE4 + } + if (c & 0x00100000) != 0 { + rval |= SSE42 + } + if (c & (1 << 25)) != 0 { + rval |= AESNI + } + if (c & (1 << 1)) != 0 { + rval |= CLMUL + } + if c&(1<<23) != 0 { + rval |= POPCNT + } + if c&(1<<30) != 0 { + rval |= RDRAND + } + if c&(1<<29) != 0 { + rval |= F16C + } + if c&(1<<13) != 0 { + rval |= CX16 + } + if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { + if threadsPerCore() > 1 { + rval |= HTT + } + } + + // Check XGETBV, OXSAVE and AVX bits + if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { + // Check for OS support + eax, _ := xgetbv(0) + if (eax & 0x6) == 0x6 { + rval |= AVX + if (c & 0x00001000) != 0 { + rval |= FMA3 + } + } + } + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 { + _, ebx, ecx, _ := cpuidex(7, 0) + if (rval&AVX) != 0 && (ebx&0x00000020) != 0 { + rval |= AVX2 + } + if (ebx & 0x00000008) != 0 { + rval |= BMI1 + if (ebx & 0x00000100) != 0 { + rval |= BMI2 + } + } + if ebx&(1<<2) != 0 { + rval |= SGX + } + if ebx&(1<<4) != 0 { + rval |= HLE + } + if ebx&(1<<9) != 0 { + rval |= ERMS + } + if ebx&(1<<11) != 0 { + rval |= RTM + } + if ebx&(1<<14) != 0 { + rval |= MPX + } + if ebx&(1<<18) != 0 { + rval |= RDSEED + } + if ebx&(1<<19) != 0 { + rval |= ADX + } + if ebx&(1<<29) != 0 { + rval |= SHA + } + + // Only detect AVX-512 features if XGETBV is supported + if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { + // Check for OS support + eax, _ := xgetbv(0) + + // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and + // ZMM16-ZMM31 state are enabled by OS) + /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). + if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { + if ebx&(1<<16) != 0 { + rval |= AVX512F + } + if ebx&(1<<17) != 0 { + rval |= AVX512DQ + } + if ebx&(1<<21) != 0 { + rval |= AVX512IFMA + } + if ebx&(1<<26) != 0 { + rval |= AVX512PF + } + if ebx&(1<<27) != 0 { + rval |= AVX512ER + } + if ebx&(1<<28) != 0 { + rval |= AVX512CD + } + if ebx&(1<<30) != 0 { + rval |= AVX512BW + } + if ebx&(1<<31) != 0 { + rval |= AVX512VL + } + // ecx + if ecx&(1<<1) != 0 { + rval |= AVX512VBMI + } + } + } + } + + if maxExtendedFunction() >= 0x80000001 { + _, _, c, d := cpuid(0x80000001) + if (c & (1 << 5)) != 0 { + rval |= LZCNT + rval |= POPCNT + } + if (d & (1 << 31)) != 0 { + rval |= AMD3DNOW + } + if (d & (1 << 30)) != 0 { + rval |= AMD3DNOWEXT + } + if (d & (1 << 23)) != 0 { + rval |= MMX + } + if (d & (1 << 22)) != 0 { + rval |= MMXEXT + } + if (c & (1 << 6)) != 0 { + rval |= SSE4A + } + if d&(1<<20) != 0 { + rval |= NX + } + if d&(1<<27) != 0 { + rval |= RDTSCP + } + + /* Allow for selectively disabling SSE2 functions on AMD processors + with SSE2 support but not SSE4a. This includes Athlon64, some + Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster + than SSE2 often enough to utilize this special-case flag. + AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case + so that SSE2 is used unless explicitly disabled by checking + AV_CPU_FLAG_SSE2SLOW. */ + if vendorID() != Intel && + rval&SSE2 != 0 && (c&0x00000040) == 0 { + rval |= SSE2SLOW + } + + /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be + * used unless the OS has AVX support. */ + if (rval & AVX) != 0 { + if (c & 0x00000800) != 0 { + rval |= XOP + } + if (c & 0x00010000) != 0 { + rval |= FMA4 + } + } + + if vendorID() == Intel { + family, model := familyModel() + if family == 6 && (model == 9 || model == 13 || model == 14) { + /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and + * 6/14 (core1 "yonah") theoretically support sse2, but it's + * usually slower than mmx. */ + if (rval & SSE2) != 0 { + rval |= SSE2SLOW + } + if (rval & SSE3) != 0 { + rval |= SSE3SLOW + } + } + /* The Atom processor has SSSE3 support, which is useful in many cases, + * but sometimes the SSSE3 version is slower than the SSE2 equivalent + * on the Atom, but is generally faster on other processors supporting + * SSSE3. This flag allows for selectively disabling certain SSSE3 + * functions on the Atom. */ + if family == 6 && model == 28 { + rval |= ATOM + } + } + } + return Flags(rval) +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/cpuid_386.s new file mode 100644 index 000000000..9947f7b6f --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_386.s @@ -0,0 +1,40 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORL CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+4(FP) + MOVL BX, ebx+8(FP) + MOVL CX, ecx+12(FP) + MOVL DX, edx+16(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+4(FP) + MOVL DX, edx+8(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s new file mode 100644 index 000000000..68a7c9d88 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s @@ -0,0 +1,40 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmXgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/cpuid_test.go b/vendor/github.com/klauspost/cpuid/cpuid_test.go new file mode 100644 index 000000000..54d2cbc51 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_test.go @@ -0,0 +1,727 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +package cpuid + +import ( + "fmt" + "testing" +) + +// There is no real way to test a CPU identifier, since results will +// obviously differ on each machine. +func TestCPUID(t *testing.T) { + n := maxFunctionID() + t.Logf("Max Function:0x%x\n", n) + n = maxExtendedFunction() + t.Logf("Max Extended Function:0x%x\n", n) + t.Log("Name:", CPU.BrandName) + t.Log("PhysicalCores:", CPU.PhysicalCores) + t.Log("ThreadsPerCore:", CPU.ThreadsPerCore) + t.Log("LogicalCores:", CPU.LogicalCores) + t.Log("Family", CPU.Family, "Model:", CPU.Model) + t.Log("Features:", CPU.Features) + t.Log("Cacheline bytes:", CPU.CacheLine) + t.Log("L1 Instruction Cache:", CPU.Cache.L1I, "bytes") + t.Log("L1 Data Cache:", CPU.Cache.L1D, "bytes") + t.Log("L2 Cache:", CPU.Cache.L2, "bytes") + t.Log("L3 Cache:", CPU.Cache.L3, "bytes") + + if CPU.SSE2() { + t.Log("We have SSE2") + } +} + +func TestDumpCPUID(t *testing.T) { + n := int(maxFunctionID()) + for i := 0; i <= n; i++ { + a, b, c, d := cpuidex(uint32(i), 0) + t.Logf("CPUID %08x: %08x-%08x-%08x-%08x", i, a, b, c, d) + ex := uint32(1) + for { + a2, b2, c2, d2 := cpuidex(uint32(i), ex) + if a2 == a && b2 == b && d2 == d || ex > 50 || a2 == 0 { + break + } + t.Logf("CPUID %08x: %08x-%08x-%08x-%08x", i, a2, b2, c2, d2) + a, b, c, d = a2, b2, c2, d2 + ex++ + } + } + n2 := maxExtendedFunction() + for i := uint32(0x80000000); i <= n2; i++ { + a, b, c, d := cpuid(i) + t.Logf("CPUID %08x: %08x-%08x-%08x-%08x", i, a, b, c, d) + } +} + +func Example() { + // Print basic CPU information: + fmt.Println("Name:", CPU.BrandName) + fmt.Println("PhysicalCores:", CPU.PhysicalCores) + fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore) + fmt.Println("LogicalCores:", CPU.LogicalCores) + fmt.Println("Family", CPU.Family, "Model:", CPU.Model) + fmt.Println("Features:", CPU.Features) + fmt.Println("Cacheline bytes:", CPU.CacheLine) + + // Test if we have a specific feature: + if CPU.SSE() { + fmt.Println("We have Streaming SIMD Extensions") + } +} + +func TestBrandNameZero(t *testing.T) { + if len(CPU.BrandName) > 0 { + // Cut out last byte + last := []byte(CPU.BrandName[len(CPU.BrandName)-1:]) + if last[0] == 0 { + t.Fatal("last byte was zero") + } else if last[0] == 32 { + t.Fatal("whitespace wasn't trimmed") + } + } +} + +// Generated here: http://play.golang.org/p/mko-0tFt0Q + +// TestCmov tests Cmov() function +func TestCmov(t *testing.T) { + got := CPU.Cmov() + expected := CPU.Features&CMOV == CMOV + if got != expected { + t.Fatalf("Cmov: expected %v, got %v", expected, got) + } + t.Log("CMOV Support:", got) +} + +// TestAmd3dnow tests Amd3dnow() function +func TestAmd3dnow(t *testing.T) { + got := CPU.Amd3dnow() + expected := CPU.Features&AMD3DNOW == AMD3DNOW + if got != expected { + t.Fatalf("Amd3dnow: expected %v, got %v", expected, got) + } + t.Log("AMD3DNOW Support:", got) +} + +// TestAmd3dnowExt tests Amd3dnowExt() function +func TestAmd3dnowExt(t *testing.T) { + got := CPU.Amd3dnowExt() + expected := CPU.Features&AMD3DNOWEXT == AMD3DNOWEXT + if got != expected { + t.Fatalf("Amd3dnowExt: expected %v, got %v", expected, got) + } + t.Log("AMD3DNOWEXT Support:", got) +} + +// TestMMX tests MMX() function +func TestMMX(t *testing.T) { + got := CPU.MMX() + expected := CPU.Features&MMX == MMX + if got != expected { + t.Fatalf("MMX: expected %v, got %v", expected, got) + } + t.Log("MMX Support:", got) +} + +// TestMMXext tests MMXext() function +func TestMMXext(t *testing.T) { + got := CPU.MMXExt() + expected := CPU.Features&MMXEXT == MMXEXT + if got != expected { + t.Fatalf("MMXExt: expected %v, got %v", expected, got) + } + t.Log("MMXEXT Support:", got) +} + +// TestSSE tests SSE() function +func TestSSE(t *testing.T) { + got := CPU.SSE() + expected := CPU.Features&SSE == SSE + if got != expected { + t.Fatalf("SSE: expected %v, got %v", expected, got) + } + t.Log("SSE Support:", got) +} + +// TestSSE2 tests SSE2() function +func TestSSE2(t *testing.T) { + got := CPU.SSE2() + expected := CPU.Features&SSE2 == SSE2 + if got != expected { + t.Fatalf("SSE2: expected %v, got %v", expected, got) + } + t.Log("SSE2 Support:", got) +} + +// TestSSE3 tests SSE3() function +func TestSSE3(t *testing.T) { + got := CPU.SSE3() + expected := CPU.Features&SSE3 == SSE3 + if got != expected { + t.Fatalf("SSE3: expected %v, got %v", expected, got) + } + t.Log("SSE3 Support:", got) +} + +// TestSSSE3 tests SSSE3() function +func TestSSSE3(t *testing.T) { + got := CPU.SSSE3() + expected := CPU.Features&SSSE3 == SSSE3 + if got != expected { + t.Fatalf("SSSE3: expected %v, got %v", expected, got) + } + t.Log("SSSE3 Support:", got) +} + +// TestSSE4 tests SSE4() function +func TestSSE4(t *testing.T) { + got := CPU.SSE4() + expected := CPU.Features&SSE4 == SSE4 + if got != expected { + t.Fatalf("SSE4: expected %v, got %v", expected, got) + } + t.Log("SSE4 Support:", got) +} + +// TestSSE42 tests SSE42() function +func TestSSE42(t *testing.T) { + got := CPU.SSE42() + expected := CPU.Features&SSE42 == SSE42 + if got != expected { + t.Fatalf("SSE42: expected %v, got %v", expected, got) + } + t.Log("SSE42 Support:", got) +} + +// TestAVX tests AVX() function +func TestAVX(t *testing.T) { + got := CPU.AVX() + expected := CPU.Features&AVX == AVX + if got != expected { + t.Fatalf("AVX: expected %v, got %v", expected, got) + } + t.Log("AVX Support:", got) +} + +// TestAVX2 tests AVX2() function +func TestAVX2(t *testing.T) { + got := CPU.AVX2() + expected := CPU.Features&AVX2 == AVX2 + if got != expected { + t.Fatalf("AVX2: expected %v, got %v", expected, got) + } + t.Log("AVX2 Support:", got) +} + +// TestFMA3 tests FMA3() function +func TestFMA3(t *testing.T) { + got := CPU.FMA3() + expected := CPU.Features&FMA3 == FMA3 + if got != expected { + t.Fatalf("FMA3: expected %v, got %v", expected, got) + } + t.Log("FMA3 Support:", got) +} + +// TestFMA4 tests FMA4() function +func TestFMA4(t *testing.T) { + got := CPU.FMA4() + expected := CPU.Features&FMA4 == FMA4 + if got != expected { + t.Fatalf("FMA4: expected %v, got %v", expected, got) + } + t.Log("FMA4 Support:", got) +} + +// TestXOP tests XOP() function +func TestXOP(t *testing.T) { + got := CPU.XOP() + expected := CPU.Features&XOP == XOP + if got != expected { + t.Fatalf("XOP: expected %v, got %v", expected, got) + } + t.Log("XOP Support:", got) +} + +// TestF16C tests F16C() function +func TestF16C(t *testing.T) { + got := CPU.F16C() + expected := CPU.Features&F16C == F16C + if got != expected { + t.Fatalf("F16C: expected %v, got %v", expected, got) + } + t.Log("F16C Support:", got) +} + +// TestCX16 tests CX16() function +func TestCX16(t *testing.T) { + got := CPU.CX16() + expected := CPU.Features&CX16 == CX16 + if got != expected { + t.Fatalf("CX16: expected %v, got %v", expected, got) + } + t.Log("CX16 Support:", got) +} + +// TestSGX tests SGX() function +func TestSGX(t *testing.T) { + got := CPU.SGX.Available + expected := CPU.Features&SGX == SGX + if got != expected { + t.Fatalf("SGX: expected %v, got %v", expected, got) + } + t.Log("SGX Support:", got) +} + +// TestBMI1 tests BMI1() function +func TestBMI1(t *testing.T) { + got := CPU.BMI1() + expected := CPU.Features&BMI1 == BMI1 + if got != expected { + t.Fatalf("BMI1: expected %v, got %v", expected, got) + } + t.Log("BMI1 Support:", got) +} + +// TestBMI2 tests BMI2() function +func TestBMI2(t *testing.T) { + got := CPU.BMI2() + expected := CPU.Features&BMI2 == BMI2 + if got != expected { + t.Fatalf("BMI2: expected %v, got %v", expected, got) + } + t.Log("BMI2 Support:", got) +} + +// TestTBM tests TBM() function +func TestTBM(t *testing.T) { + got := CPU.TBM() + expected := CPU.Features&TBM == TBM + if got != expected { + t.Fatalf("TBM: expected %v, got %v", expected, got) + } + t.Log("TBM Support:", got) +} + +// TestLzcnt tests Lzcnt() function +func TestLzcnt(t *testing.T) { + got := CPU.Lzcnt() + expected := CPU.Features&LZCNT == LZCNT + if got != expected { + t.Fatalf("Lzcnt: expected %v, got %v", expected, got) + } + t.Log("LZCNT Support:", got) +} + +// TestLzcnt tests Lzcnt() function +func TestPopcnt(t *testing.T) { + got := CPU.Popcnt() + expected := CPU.Features&POPCNT == POPCNT + if got != expected { + t.Fatalf("Popcnt: expected %v, got %v", expected, got) + } + t.Log("POPCNT Support:", got) +} + +// TestAesNi tests AesNi() function +func TestAesNi(t *testing.T) { + got := CPU.AesNi() + expected := CPU.Features&AESNI == AESNI + if got != expected { + t.Fatalf("AesNi: expected %v, got %v", expected, got) + } + t.Log("AESNI Support:", got) +} + +// TestHTT tests HTT() function +func TestHTT(t *testing.T) { + got := CPU.HTT() + expected := CPU.Features&HTT == HTT + if got != expected { + t.Fatalf("HTT: expected %v, got %v", expected, got) + } + t.Log("HTT Support:", got) +} + +// TestClmul tests Clmul() function +func TestClmul(t *testing.T) { + got := CPU.Clmul() + expected := CPU.Features&CLMUL == CLMUL + if got != expected { + t.Fatalf("Clmul: expected %v, got %v", expected, got) + } + t.Log("CLMUL Support:", got) +} + +// TestSSE2Slow tests SSE2Slow() function +func TestSSE2Slow(t *testing.T) { + got := CPU.SSE2Slow() + expected := CPU.Features&SSE2SLOW == SSE2SLOW + if got != expected { + t.Fatalf("SSE2Slow: expected %v, got %v", expected, got) + } + t.Log("SSE2SLOW Support:", got) +} + +// TestSSE3Slow tests SSE3slow() function +func TestSSE3Slow(t *testing.T) { + got := CPU.SSE3Slow() + expected := CPU.Features&SSE3SLOW == SSE3SLOW + if got != expected { + t.Fatalf("SSE3slow: expected %v, got %v", expected, got) + } + t.Log("SSE3SLOW Support:", got) +} + +// TestAtom tests Atom() function +func TestAtom(t *testing.T) { + got := CPU.Atom() + expected := CPU.Features&ATOM == ATOM + if got != expected { + t.Fatalf("Atom: expected %v, got %v", expected, got) + } + t.Log("ATOM Support:", got) +} + +// TestNX tests NX() function (NX (No-Execute) bit) +func TestNX(t *testing.T) { + got := CPU.NX() + expected := CPU.Features&NX == NX + if got != expected { + t.Fatalf("NX: expected %v, got %v", expected, got) + } + t.Log("NX Support:", got) +} + +// TestSSE4A tests SSE4A() function (AMD Barcelona microarchitecture SSE4a instructions) +func TestSSE4A(t *testing.T) { + got := CPU.SSE4A() + expected := CPU.Features&SSE4A == SSE4A + if got != expected { + t.Fatalf("SSE4A: expected %v, got %v", expected, got) + } + t.Log("SSE4A Support:", got) +} + +// TestHLE tests HLE() function (Hardware Lock Elision) +func TestHLE(t *testing.T) { + got := CPU.HLE() + expected := CPU.Features&HLE == HLE + if got != expected { + t.Fatalf("HLE: expected %v, got %v", expected, got) + } + t.Log("HLE Support:", got) +} + +// TestRTM tests RTM() function (Restricted Transactional Memory) +func TestRTM(t *testing.T) { + got := CPU.RTM() + expected := CPU.Features&RTM == RTM + if got != expected { + t.Fatalf("RTM: expected %v, got %v", expected, got) + } + t.Log("RTM Support:", got) +} + +// TestRdrand tests RDRAND() function (RDRAND instruction is available) +func TestRdrand(t *testing.T) { + got := CPU.Rdrand() + expected := CPU.Features&RDRAND == RDRAND + if got != expected { + t.Fatalf("Rdrand: expected %v, got %v", expected, got) + } + t.Log("Rdrand Support:", got) +} + +// TestRdseed tests RDSEED() function (RDSEED instruction is available) +func TestRdseed(t *testing.T) { + got := CPU.Rdseed() + expected := CPU.Features&RDSEED == RDSEED + if got != expected { + t.Fatalf("Rdseed: expected %v, got %v", expected, got) + } + t.Log("Rdseed Support:", got) +} + +// TestADX tests ADX() function (Intel ADX (Multi-Precision Add-Carry Instruction Extensions)) +func TestADX(t *testing.T) { + got := CPU.ADX() + expected := CPU.Features&ADX == ADX + if got != expected { + t.Fatalf("ADX: expected %v, got %v", expected, got) + } + t.Log("ADX Support:", got) +} + +// TestSHA tests SHA() function (Intel SHA Extensions) +func TestSHA(t *testing.T) { + got := CPU.SHA() + expected := CPU.Features&SHA == SHA + if got != expected { + t.Fatalf("SHA: expected %v, got %v", expected, got) + } + t.Log("SHA Support:", got) +} + +// TestAVX512F tests AVX512F() function (AVX-512 Foundation) +func TestAVX512F(t *testing.T) { + got := CPU.AVX512F() + expected := CPU.Features&AVX512F == AVX512F + if got != expected { + t.Fatalf("AVX512F: expected %v, got %v", expected, got) + } + t.Log("AVX512F Support:", got) +} + +// TestAVX512DQ tests AVX512DQ() function (AVX-512 Doubleword and Quadword Instructions) +func TestAVX512DQ(t *testing.T) { + got := CPU.AVX512DQ() + expected := CPU.Features&AVX512DQ == AVX512DQ + if got != expected { + t.Fatalf("AVX512DQ: expected %v, got %v", expected, got) + } + t.Log("AVX512DQ Support:", got) +} + +// TestAVX512IFMA tests AVX512IFMA() function (AVX-512 Integer Fused Multiply-Add Instructions) +func TestAVX512IFMA(t *testing.T) { + got := CPU.AVX512IFMA() + expected := CPU.Features&AVX512IFMA == AVX512IFMA + if got != expected { + t.Fatalf("AVX512IFMA: expected %v, got %v", expected, got) + } + t.Log("AVX512IFMA Support:", got) +} + +// TestAVX512PF tests AVX512PF() function (AVX-512 Prefetch Instructions) +func TestAVX512PF(t *testing.T) { + got := CPU.AVX512PF() + expected := CPU.Features&AVX512PF == AVX512PF + if got != expected { + t.Fatalf("AVX512PF: expected %v, got %v", expected, got) + } + t.Log("AVX512PF Support:", got) +} + +// TestAVX512ER tests AVX512ER() function (AVX-512 Exponential and Reciprocal Instructions) +func TestAVX512ER(t *testing.T) { + got := CPU.AVX512ER() + expected := CPU.Features&AVX512ER == AVX512ER + if got != expected { + t.Fatalf("AVX512ER: expected %v, got %v", expected, got) + } + t.Log("AVX512ER Support:", got) +} + +// TestAVX512CD tests AVX512CD() function (AVX-512 Conflict Detection Instructions) +func TestAVX512CD(t *testing.T) { + got := CPU.AVX512CD() + expected := CPU.Features&AVX512CD == AVX512CD + if got != expected { + t.Fatalf("AVX512CD: expected %v, got %v", expected, got) + } + t.Log("AVX512CD Support:", got) +} + +// TestAVX512BW tests AVX512BW() function (AVX-512 Byte and Word Instructions) +func TestAVX512BW(t *testing.T) { + got := CPU.AVX512BW() + expected := CPU.Features&AVX512BW == AVX512BW + if got != expected { + t.Fatalf("AVX512BW: expected %v, got %v", expected, got) + } + t.Log("AVX512BW Support:", got) +} + +// TestAVX512VL tests AVX512VL() function (AVX-512 Vector Length Extensions) +func TestAVX512VL(t *testing.T) { + got := CPU.AVX512VL() + expected := CPU.Features&AVX512VL == AVX512VL + if got != expected { + t.Fatalf("AVX512VL: expected %v, got %v", expected, got) + } + t.Log("AVX512VL Support:", got) +} + +// TestAVX512VL tests AVX512VBMI() function (AVX-512 Vector Bit Manipulation Instructions) +func TestAVX512VBMI(t *testing.T) { + got := CPU.AVX512VBMI() + expected := CPU.Features&AVX512VBMI == AVX512VBMI + if got != expected { + t.Fatalf("AVX512VBMI: expected %v, got %v", expected, got) + } + t.Log("AVX512VBMI Support:", got) +} + +// TestMPX tests MPX() function (Intel MPX (Memory Protection Extensions)) +func TestMPX(t *testing.T) { + got := CPU.MPX() + expected := CPU.Features&MPX == MPX + if got != expected { + t.Fatalf("MPX: expected %v, got %v", expected, got) + } + t.Log("MPX Support:", got) +} + +// TestERMS tests ERMS() function (Enhanced REP MOVSB/STOSB) +func TestERMS(t *testing.T) { + got := CPU.ERMS() + expected := CPU.Features&ERMS == ERMS + if got != expected { + t.Fatalf("ERMS: expected %v, got %v", expected, got) + } + t.Log("ERMS Support:", got) +} + +// TestVendor writes the detected vendor. Will be 0 if unknown +func TestVendor(t *testing.T) { + t.Log("Vendor ID:", CPU.VendorID) +} + +// Intel returns true if vendor is recognized as Intel +func TestIntel(t *testing.T) { + got := CPU.Intel() + expected := CPU.VendorID == Intel + if got != expected { + t.Fatalf("TestIntel: expected %v, got %v", expected, got) + } + t.Log("TestIntel:", got) +} + +// AMD returns true if vendor is recognized as AMD +func TestAMD(t *testing.T) { + got := CPU.AMD() + expected := CPU.VendorID == AMD + if got != expected { + t.Fatalf("TestAMD: expected %v, got %v", expected, got) + } + t.Log("TestAMD:", got) +} + +// Transmeta returns true if vendor is recognized as Transmeta +func TestTransmeta(t *testing.T) { + got := CPU.Transmeta() + expected := CPU.VendorID == Transmeta + if got != expected { + t.Fatalf("TestTransmeta: expected %v, got %v", expected, got) + } + t.Log("TestTransmeta:", got) +} + +// NSC returns true if vendor is recognized as National Semiconductor +func TestNSC(t *testing.T) { + got := CPU.NSC() + expected := CPU.VendorID == NSC + if got != expected { + t.Fatalf("TestNSC: expected %v, got %v", expected, got) + } + t.Log("TestNSC:", got) +} + +// VIA returns true if vendor is recognized as VIA +func TestVIA(t *testing.T) { + got := CPU.VIA() + expected := CPU.VendorID == VIA + if got != expected { + t.Fatalf("TestVIA: expected %v, got %v", expected, got) + } + t.Log("TestVIA:", got) +} + +// Test VM function +func TestVM(t *testing.T) { + t.Log("Vendor ID:", CPU.VM()) +} + +// Test RTCounter function +func TestRtCounter(t *testing.T) { + a := CPU.RTCounter() + b := CPU.RTCounter() + t.Log("CPU Counter:", a, b, b-a) +} + +// Prints the value of Ia32TscAux() +func TestIa32TscAux(t *testing.T) { + ecx := CPU.Ia32TscAux() + t.Logf("Ia32TscAux:0x%x\n", ecx) + if ecx != 0 { + chip := (ecx & 0xFFF000) >> 12 + core := ecx & 0xFFF + t.Log("Likely chip, core:", chip, core) + } +} + +func TestThreadsPerCoreNZ(t *testing.T) { + if CPU.ThreadsPerCore == 0 { + t.Fatal("threads per core is zero") + } +} + +// Prints the value of LogicalCPU() +func TestLogicalCPU(t *testing.T) { + t.Log("Currently executing on cpu:", CPU.LogicalCPU()) +} + +func TestMaxFunction(t *testing.T) { + expect := maxFunctionID() + if CPU.maxFunc != expect { + t.Fatal("Max function does not match, expected", expect, "but got", CPU.maxFunc) + } + expect = maxExtendedFunction() + if CPU.maxExFunc != expect { + t.Fatal("Max Extended function does not match, expected", expect, "but got", CPU.maxFunc) + } +} + +// This example will calculate the chip/core number on Linux +// Linux encodes numa id (<<12) and core id (8bit) into TSC_AUX. +func ExampleCPUInfo_Ia32TscAux(t *testing.T) { + ecx := CPU.Ia32TscAux() + if ecx == 0 { + fmt.Println("Unknown CPU ID") + return + } + chip := (ecx & 0xFFF000) >> 12 + core := ecx & 0xFFF + fmt.Println("Chip, Core:", chip, core) +} + +/* +func TestPhysical(t *testing.T) { + var test16 = "CPUID 00000000: 0000000d-756e6547-6c65746e-49656e69 \nCPUID 00000001: 000206d7-03200800-1fbee3ff-bfebfbff \nCPUID 00000002: 76035a01-00f0b2ff-00000000-00ca0000 \nCPUID 00000003: 00000000-00000000-00000000-00000000 \nCPUID 00000004: 3c004121-01c0003f-0000003f-00000000 \nCPUID 00000004: 3c004122-01c0003f-0000003f-00000000 \nCPUID 00000004: 3c004143-01c0003f-000001ff-00000000 \nCPUID 00000004: 3c07c163-04c0003f-00003fff-00000006 \nCPUID 00000005: 00000040-00000040-00000003-00021120 \nCPUID 00000006: 00000075-00000002-00000009-00000000 \nCPUID 00000007: 00000000-00000000-00000000-00000000 \nCPUID 00000008: 00000000-00000000-00000000-00000000 \nCPUID 00000009: 00000001-00000000-00000000-00000000 \nCPUID 0000000a: 07300403-00000000-00000000-00000603 \nCPUID 0000000b: 00000000-00000000-00000003-00000003 \nCPUID 0000000b: 00000005-00000010-00000201-00000003 \nCPUID 0000000c: 00000000-00000000-00000000-00000000 \nCPUID 0000000d: 00000007-00000340-00000340-00000000 \nCPUID 0000000d: 00000001-00000000-00000000-00000000 \nCPUID 0000000d: 00000100-00000240-00000000-00000000 \nCPUID 80000000: 80000008-00000000-00000000-00000000 \nCPUID 80000001: 00000000-00000000-00000001-2c100800 \nCPUID 80000002: 20202020-49202020-6c65746e-20295228 \nCPUID 80000003: 6e6f6558-20295228-20555043-322d3545 \nCPUID 80000004: 20303636-20402030-30322e32-007a4847 \nCPUID 80000005: 00000000-00000000-00000000-00000000 \nCPUID 80000006: 00000000-00000000-01006040-00000000 \nCPUID 80000007: 00000000-00000000-00000000-00000100 \nCPUID 80000008: 0000302e-00000000-00000000-00000000" + restore := mockCPU([]byte(test16)) + Detect() + t.Log("Name:", CPU.BrandName) + n := maxFunctionID() + t.Logf("Max Function:0x%x\n", n) + n = maxExtendedFunction() + t.Logf("Max Extended Function:0x%x\n", n) + t.Log("PhysicalCores:", CPU.PhysicalCores) + t.Log("ThreadsPerCore:", CPU.ThreadsPerCore) + t.Log("LogicalCores:", CPU.LogicalCores) + t.Log("Family", CPU.Family, "Model:", CPU.Model) + t.Log("Features:", CPU.Features) + t.Log("Cacheline bytes:", CPU.CacheLine) + t.Log("L1 Instruction Cache:", CPU.Cache.L1I, "bytes") + t.Log("L1 Data Cache:", CPU.Cache.L1D, "bytes") + t.Log("L2 Cache:", CPU.Cache.L2, "bytes") + t.Log("L3 Cache:", CPU.Cache.L3, "bytes") + if CPU.LogicalCores > 0 && CPU.PhysicalCores > 0 { + if CPU.LogicalCores != CPU.PhysicalCores*CPU.ThreadsPerCore { + t.Fatalf("Core count mismatch, LogicalCores (%d) != PhysicalCores (%d) * CPU.ThreadsPerCore (%d)", + CPU.LogicalCores, CPU.PhysicalCores, CPU.ThreadsPerCore) + } + } + + if CPU.ThreadsPerCore > 1 && !CPU.HTT() { + t.Fatalf("Hyperthreading not detected") + } + if CPU.ThreadsPerCore == 1 && CPU.HTT() { + t.Fatalf("Hyperthreading detected, but only 1 Thread per core") + } + restore() + Detect() + TestCPUID(t) +} +*/ diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/detect_intel.go new file mode 100644 index 000000000..d5475ebfa --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_intel.go @@ -0,0 +1,17 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// +build 386 amd64 + +package cpuid + +func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func asmXgetbv(index uint32) (eax, edx uint32) +func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) + +func initCPU() { + cpuid = asmCpuid + cpuidex = asmCpuidex + xgetbv = asmXgetbv + rdtscpAsm = asmRdtscpAsm +} diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/detect_ref.go new file mode 100644 index 000000000..6f9231b13 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_ref.go @@ -0,0 +1,23 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// +build !amd64,!386 + +package cpuid + +func initCPU() { + cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 + } + + cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 + } + + xgetbv = func(index uint32) (eax, edx uint32) { + return 0, 0 + } + + rdtscpAsm = func() (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 + } +} diff --git a/vendor/github.com/klauspost/cpuid/generate.go b/vendor/github.com/klauspost/cpuid/generate.go new file mode 100644 index 000000000..c060b8165 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/generate.go @@ -0,0 +1,3 @@ +package cpuid + +//go:generate go run private-gen.go diff --git a/vendor/github.com/klauspost/cpuid/mockcpu_test.go b/vendor/github.com/klauspost/cpuid/mockcpu_test.go new file mode 100644 index 000000000..f15173f73 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/mockcpu_test.go @@ -0,0 +1,209 @@ +package cpuid + +import ( + "archive/zip" + "fmt" + "io/ioutil" + "sort" + "strings" + "testing" +) + +type fakecpuid map[uint32][][]uint32 + +type idfuncs struct { + cpuid func(op uint32) (eax, ebx, ecx, edx uint32) + cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) + xgetbv func(index uint32) (eax, edx uint32) +} + +func (f fakecpuid) String() string { + var out = make([]string, 0, len(f)) + for key, val := range f { + for _, v := range val { + out = append(out, fmt.Sprintf("CPUID %08x: [%08x, %08x, %08x, %08x]", key, v[0], v[1], v[2], v[3])) + } + } + sorter := sort.StringSlice(out) + sort.Sort(&sorter) + return strings.Join(sorter, "\n") +} + +func mockCPU(def []byte) func() { + lines := strings.Split(string(def), "\n") + anyfound := false + fakeID := make(fakecpuid) + for _, line := range lines { + line = strings.Trim(line, "\r\t ") + if !strings.HasPrefix(line, "CPUID") { + continue + } + // Only collect for first cpu + if strings.HasPrefix(line, "CPUID 00000000") { + if anyfound { + break + } + } + if !strings.Contains(line, "-") { + //continue + } + items := strings.Split(line, ":") + if len(items) < 2 { + if len(line) == 51 || len(line) == 50 { + items = []string{line[0:14], line[15:]} + } else { + items = strings.Split(line, "\t") + if len(items) != 2 { + //fmt.Println("not found:", line, "len:", len(line)) + continue + } + } + } + items = items[0:2] + vals := strings.Trim(items[1], "\r\n ") + + var idV uint32 + n, err := fmt.Sscanf(items[0], "CPUID %x", &idV) + if err != nil || n != 1 { + continue + } + existing, ok := fakeID[idV] + if !ok { + existing = make([][]uint32, 0) + } + + values := make([]uint32, 4) + n, err = fmt.Sscanf(vals, "%x-%x-%x-%x", &values[0], &values[1], &values[2], &values[3]) + if n != 4 || err != nil { + n, err = fmt.Sscanf(vals, "%x %x %x %x", &values[0], &values[1], &values[2], &values[3]) + if n != 4 || err != nil { + //fmt.Println("scanned", vals, "got", n, "Err:", err) + continue + } + } + + existing = append(existing, values) + fakeID[idV] = existing + anyfound = true + } + + restorer := func(f idfuncs) func() { + return func() { + cpuid = f.cpuid + cpuidex = f.cpuidex + xgetbv = f.xgetbv + } + }(idfuncs{cpuid: cpuid, cpuidex: cpuidex, xgetbv: xgetbv}) + + cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) { + if op == 0x80000000 || op == 0 { + var ok bool + _, ok = fakeID[op] + if !ok { + return 0, 0, 0, 0 + } + } + first, ok := fakeID[op] + if !ok { + if op > maxFunctionID() { + panic(fmt.Sprintf("Base not found: %v, request:%#v\n", fakeID, op)) + } else { + // we have some entries missing + return 0, 0, 0, 0 + } + } + theid := first[0] + return theid[0], theid[1], theid[2], theid[3] + } + cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) { + if op == 0x80000000 { + var ok bool + _, ok = fakeID[op] + if !ok { + return 0, 0, 0, 0 + } + } + first, ok := fakeID[op] + if !ok { + if op > maxExtendedFunction() { + panic(fmt.Sprintf("Extended not found Info: %v, request:%#v, %#v\n", fakeID, op, op2)) + } else { + // we have some entries missing + return 0, 0, 0, 0 + } + } + if int(op2) >= len(first) { + //fmt.Printf("Extended not found Info: %v, request:%#v, %#v\n", fakeID, op, op2) + return 0, 0, 0, 0 + } + theid := first[op2] + return theid[0], theid[1], theid[2], theid[3] + } + xgetbv = func(index uint32) (eax, edx uint32) { + first, ok := fakeID[1] + if !ok { + panic(fmt.Sprintf("XGETBV not supported %v", fakeID)) + } + second := first[0] + // ECX bit 26 must be set + if (second[2] & 1 << 26) == 0 { + panic(fmt.Sprintf("XGETBV not supported %v", fakeID)) + } + // We don't have any data to return, unfortunately + return 0, 0 + } + return restorer +} + +func TestMocks(t *testing.T) { + zr, err := zip.OpenReader("testdata/cpuid_data.zip") + if err != nil { + t.Skip("No testdata:", err) + } + defer zr.Close() + for _, f := range zr.File { + rc, err := f.Open() + if err != nil { + t.Fatal(err) + } + content, err := ioutil.ReadAll(rc) + if err != nil { + t.Fatal(err) + } + rc.Close() + t.Log("Opening", f.FileInfo().Name()) + restore := mockCPU(content) + Detect() + t.Log("Name:", CPU.BrandName) + n := maxFunctionID() + t.Logf("Max Function:0x%x\n", n) + n = maxExtendedFunction() + t.Logf("Max Extended Function:0x%x\n", n) + t.Log("PhysicalCores:", CPU.PhysicalCores) + t.Log("ThreadsPerCore:", CPU.ThreadsPerCore) + t.Log("LogicalCores:", CPU.LogicalCores) + t.Log("Family", CPU.Family, "Model:", CPU.Model) + t.Log("Features:", CPU.Features) + t.Log("Cacheline bytes:", CPU.CacheLine) + t.Log("L1 Instruction Cache:", CPU.Cache.L1I, "bytes") + t.Log("L1 Data Cache:", CPU.Cache.L1D, "bytes") + t.Log("L2 Cache:", CPU.Cache.L2, "bytes") + t.Log("L3 Cache:", CPU.Cache.L3, "bytes") + if CPU.LogicalCores > 0 && CPU.PhysicalCores > 0 { + if CPU.LogicalCores != CPU.PhysicalCores*CPU.ThreadsPerCore { + t.Fatalf("Core count mismatch, LogicalCores (%d) != PhysicalCores (%d) * CPU.ThreadsPerCore (%d)", + CPU.LogicalCores, CPU.PhysicalCores, CPU.ThreadsPerCore) + } + } + + if CPU.ThreadsPerCore > 1 && !CPU.HTT() { + t.Fatalf("Hyperthreading not detected") + } + if CPU.ThreadsPerCore == 1 && CPU.HTT() { + t.Fatalf("Hyperthreading detected, but only 1 Thread per core") + } + restore() + } + Detect() + +} diff --git a/vendor/github.com/klauspost/cpuid/private-gen.go b/vendor/github.com/klauspost/cpuid/private-gen.go new file mode 100644 index 000000000..437333d29 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/private-gen.go @@ -0,0 +1,476 @@ +// +build ignore + +package main + +import ( + "bytes" + "fmt" + "go/ast" + "go/parser" + "go/printer" + "go/token" + "io" + "io/ioutil" + "log" + "os" + "reflect" + "strings" + "unicode" + "unicode/utf8" +) + +var inFiles = []string{"cpuid.go", "cpuid_test.go"} +var copyFiles = []string{"cpuid_amd64.s", "cpuid_386.s", "detect_ref.go", "detect_intel.go"} +var fileSet = token.NewFileSet() +var reWrites = []rewrite{ + initRewrite("CPUInfo -> cpuInfo"), + initRewrite("Vendor -> vendor"), + initRewrite("Flags -> flags"), + initRewrite("Detect -> detect"), + initRewrite("CPU -> cpu"), +} +var excludeNames = map[string]bool{"string": true, "join": true, "trim": true, + // cpuid_test.go + "t": true, "println": true, "logf": true, "log": true, "fatalf": true, "fatal": true, +} + +var excludePrefixes = []string{"test", "benchmark"} + +func main() { + Package := "private" + parserMode := parser.ParseComments + exported := make(map[string]rewrite) + for _, file := range inFiles { + in, err := os.Open(file) + if err != nil { + log.Fatalf("opening input", err) + } + + src, err := ioutil.ReadAll(in) + if err != nil { + log.Fatalf("reading input", err) + } + + astfile, err := parser.ParseFile(fileSet, file, src, parserMode) + if err != nil { + log.Fatalf("parsing input", err) + } + + for _, rw := range reWrites { + astfile = rw(astfile) + } + + // Inspect the AST and print all identifiers and literals. + var startDecl token.Pos + var endDecl token.Pos + ast.Inspect(astfile, func(n ast.Node) bool { + var s string + switch x := n.(type) { + case *ast.Ident: + if x.IsExported() { + t := strings.ToLower(x.Name) + for _, pre := range excludePrefixes { + if strings.HasPrefix(t, pre) { + return true + } + } + if excludeNames[t] != true { + //if x.Pos() > startDecl && x.Pos() < endDecl { + exported[x.Name] = initRewrite(x.Name + " -> " + t) + } + } + + case *ast.GenDecl: + if x.Tok == token.CONST && x.Lparen > 0 { + startDecl = x.Lparen + endDecl = x.Rparen + // fmt.Printf("Decl:%s -> %s\n", fileSet.Position(startDecl), fileSet.Position(endDecl)) + } + } + if s != "" { + fmt.Printf("%s:\t%s\n", fileSet.Position(n.Pos()), s) + } + return true + }) + + for _, rw := range exported { + astfile = rw(astfile) + } + + var buf bytes.Buffer + + printer.Fprint(&buf, fileSet, astfile) + + // Remove package documentation and insert information + s := buf.String() + ind := strings.Index(buf.String(), "\npackage cpuid") + s = s[ind:] + s = "// Generated, DO NOT EDIT,\n" + + "// but copy it to your own project and rename the package.\n" + + "// See more at http://github.com/klauspost/cpuid\n" + + s + + outputName := Package + string(os.PathSeparator) + file + + err = ioutil.WriteFile(outputName, []byte(s), 0644) + if err != nil { + log.Fatalf("writing output: %s", err) + } + log.Println("Generated", outputName) + } + + for _, file := range copyFiles { + dst := "" + if strings.HasPrefix(file, "cpuid") { + dst = Package + string(os.PathSeparator) + file + } else { + dst = Package + string(os.PathSeparator) + "cpuid_" + file + } + err := copyFile(file, dst) + if err != nil { + log.Fatalf("copying file: %s", err) + } + log.Println("Copied", dst) + } +} + +// CopyFile copies a file from src to dst. If src and dst files exist, and are +// the same, then return success. Copy the file contents from src to dst. +func copyFile(src, dst string) (err error) { + sfi, err := os.Stat(src) + if err != nil { + return + } + if !sfi.Mode().IsRegular() { + // cannot copy non-regular files (e.g., directories, + // symlinks, devices, etc.) + return fmt.Errorf("CopyFile: non-regular source file %s (%q)", sfi.Name(), sfi.Mode().String()) + } + dfi, err := os.Stat(dst) + if err != nil { + if !os.IsNotExist(err) { + return + } + } else { + if !(dfi.Mode().IsRegular()) { + return fmt.Errorf("CopyFile: non-regular destination file %s (%q)", dfi.Name(), dfi.Mode().String()) + } + if os.SameFile(sfi, dfi) { + return + } + } + err = copyFileContents(src, dst) + return +} + +// copyFileContents copies the contents of the file named src to the file named +// by dst. The file will be created if it does not already exist. If the +// destination file exists, all it's contents will be replaced by the contents +// of the source file. +func copyFileContents(src, dst string) (err error) { + in, err := os.Open(src) + if err != nil { + return + } + defer in.Close() + out, err := os.Create(dst) + if err != nil { + return + } + defer func() { + cerr := out.Close() + if err == nil { + err = cerr + } + }() + if _, err = io.Copy(out, in); err != nil { + return + } + err = out.Sync() + return +} + +type rewrite func(*ast.File) *ast.File + +// Mostly copied from gofmt +func initRewrite(rewriteRule string) rewrite { + f := strings.Split(rewriteRule, "->") + if len(f) != 2 { + fmt.Fprintf(os.Stderr, "rewrite rule must be of the form 'pattern -> replacement'\n") + os.Exit(2) + } + pattern := parseExpr(f[0], "pattern") + replace := parseExpr(f[1], "replacement") + return func(p *ast.File) *ast.File { return rewriteFile(pattern, replace, p) } +} + +// parseExpr parses s as an expression. +// It might make sense to expand this to allow statement patterns, +// but there are problems with preserving formatting and also +// with what a wildcard for a statement looks like. +func parseExpr(s, what string) ast.Expr { + x, err := parser.ParseExpr(s) + if err != nil { + fmt.Fprintf(os.Stderr, "parsing %s %s at %s\n", what, s, err) + os.Exit(2) + } + return x +} + +// Keep this function for debugging. +/* +func dump(msg string, val reflect.Value) { + fmt.Printf("%s:\n", msg) + ast.Print(fileSet, val.Interface()) + fmt.Println() +} +*/ + +// rewriteFile applies the rewrite rule 'pattern -> replace' to an entire file. +func rewriteFile(pattern, replace ast.Expr, p *ast.File) *ast.File { + cmap := ast.NewCommentMap(fileSet, p, p.Comments) + m := make(map[string]reflect.Value) + pat := reflect.ValueOf(pattern) + repl := reflect.ValueOf(replace) + + var rewriteVal func(val reflect.Value) reflect.Value + rewriteVal = func(val reflect.Value) reflect.Value { + // don't bother if val is invalid to start with + if !val.IsValid() { + return reflect.Value{} + } + for k := range m { + delete(m, k) + } + val = apply(rewriteVal, val) + if match(m, pat, val) { + val = subst(m, repl, reflect.ValueOf(val.Interface().(ast.Node).Pos())) + } + return val + } + + r := apply(rewriteVal, reflect.ValueOf(p)).Interface().(*ast.File) + r.Comments = cmap.Filter(r).Comments() // recreate comments list + return r +} + +// set is a wrapper for x.Set(y); it protects the caller from panics if x cannot be changed to y. +func set(x, y reflect.Value) { + // don't bother if x cannot be set or y is invalid + if !x.CanSet() || !y.IsValid() { + return + } + defer func() { + if x := recover(); x != nil { + if s, ok := x.(string); ok && + (strings.Contains(s, "type mismatch") || strings.Contains(s, "not assignable")) { + // x cannot be set to y - ignore this rewrite + return + } + panic(x) + } + }() + x.Set(y) +} + +// Values/types for special cases. +var ( + objectPtrNil = reflect.ValueOf((*ast.Object)(nil)) + scopePtrNil = reflect.ValueOf((*ast.Scope)(nil)) + + identType = reflect.TypeOf((*ast.Ident)(nil)) + objectPtrType = reflect.TypeOf((*ast.Object)(nil)) + positionType = reflect.TypeOf(token.NoPos) + callExprType = reflect.TypeOf((*ast.CallExpr)(nil)) + scopePtrType = reflect.TypeOf((*ast.Scope)(nil)) +) + +// apply replaces each AST field x in val with f(x), returning val. +// To avoid extra conversions, f operates on the reflect.Value form. +func apply(f func(reflect.Value) reflect.Value, val reflect.Value) reflect.Value { + if !val.IsValid() { + return reflect.Value{} + } + + // *ast.Objects introduce cycles and are likely incorrect after + // rewrite; don't follow them but replace with nil instead + if val.Type() == objectPtrType { + return objectPtrNil + } + + // similarly for scopes: they are likely incorrect after a rewrite; + // replace them with nil + if val.Type() == scopePtrType { + return scopePtrNil + } + + switch v := reflect.Indirect(val); v.Kind() { + case reflect.Slice: + for i := 0; i < v.Len(); i++ { + e := v.Index(i) + set(e, f(e)) + } + case reflect.Struct: + for i := 0; i < v.NumField(); i++ { + e := v.Field(i) + set(e, f(e)) + } + case reflect.Interface: + e := v.Elem() + set(v, f(e)) + } + return val +} + +func isWildcard(s string) bool { + rune, size := utf8.DecodeRuneInString(s) + return size == len(s) && unicode.IsLower(rune) +} + +// match returns true if pattern matches val, +// recording wildcard submatches in m. +// If m == nil, match checks whether pattern == val. +func match(m map[string]reflect.Value, pattern, val reflect.Value) bool { + // Wildcard matches any expression. If it appears multiple + // times in the pattern, it must match the same expression + // each time. + if m != nil && pattern.IsValid() && pattern.Type() == identType { + name := pattern.Interface().(*ast.Ident).Name + if isWildcard(name) && val.IsValid() { + // wildcards only match valid (non-nil) expressions. + if _, ok := val.Interface().(ast.Expr); ok && !val.IsNil() { + if old, ok := m[name]; ok { + return match(nil, old, val) + } + m[name] = val + return true + } + } + } + + // Otherwise, pattern and val must match recursively. + if !pattern.IsValid() || !val.IsValid() { + return !pattern.IsValid() && !val.IsValid() + } + if pattern.Type() != val.Type() { + return false + } + + // Special cases. + switch pattern.Type() { + case identType: + // For identifiers, only the names need to match + // (and none of the other *ast.Object information). + // This is a common case, handle it all here instead + // of recursing down any further via reflection. + p := pattern.Interface().(*ast.Ident) + v := val.Interface().(*ast.Ident) + return p == nil && v == nil || p != nil && v != nil && p.Name == v.Name + case objectPtrType, positionType: + // object pointers and token positions always match + return true + case callExprType: + // For calls, the Ellipsis fields (token.Position) must + // match since that is how f(x) and f(x...) are different. + // Check them here but fall through for the remaining fields. + p := pattern.Interface().(*ast.CallExpr) + v := val.Interface().(*ast.CallExpr) + if p.Ellipsis.IsValid() != v.Ellipsis.IsValid() { + return false + } + } + + p := reflect.Indirect(pattern) + v := reflect.Indirect(val) + if !p.IsValid() || !v.IsValid() { + return !p.IsValid() && !v.IsValid() + } + + switch p.Kind() { + case reflect.Slice: + if p.Len() != v.Len() { + return false + } + for i := 0; i < p.Len(); i++ { + if !match(m, p.Index(i), v.Index(i)) { + return false + } + } + return true + + case reflect.Struct: + for i := 0; i < p.NumField(); i++ { + if !match(m, p.Field(i), v.Field(i)) { + return false + } + } + return true + + case reflect.Interface: + return match(m, p.Elem(), v.Elem()) + } + + // Handle token integers, etc. + return p.Interface() == v.Interface() +} + +// subst returns a copy of pattern with values from m substituted in place +// of wildcards and pos used as the position of tokens from the pattern. +// if m == nil, subst returns a copy of pattern and doesn't change the line +// number information. +func subst(m map[string]reflect.Value, pattern reflect.Value, pos reflect.Value) reflect.Value { + if !pattern.IsValid() { + return reflect.Value{} + } + + // Wildcard gets replaced with map value. + if m != nil && pattern.Type() == identType { + name := pattern.Interface().(*ast.Ident).Name + if isWildcard(name) { + if old, ok := m[name]; ok { + return subst(nil, old, reflect.Value{}) + } + } + } + + if pos.IsValid() && pattern.Type() == positionType { + // use new position only if old position was valid in the first place + if old := pattern.Interface().(token.Pos); !old.IsValid() { + return pattern + } + return pos + } + + // Otherwise copy. + switch p := pattern; p.Kind() { + case reflect.Slice: + v := reflect.MakeSlice(p.Type(), p.Len(), p.Len()) + for i := 0; i < p.Len(); i++ { + v.Index(i).Set(subst(m, p.Index(i), pos)) + } + return v + + case reflect.Struct: + v := reflect.New(p.Type()).Elem() + for i := 0; i < p.NumField(); i++ { + v.Field(i).Set(subst(m, p.Field(i), pos)) + } + return v + + case reflect.Ptr: + v := reflect.New(p.Type()).Elem() + if elem := p.Elem(); elem.IsValid() { + v.Set(subst(m, elem, pos).Addr()) + } + return v + + case reflect.Interface: + v := reflect.New(p.Type()).Elem() + if elem := p.Elem(); elem.IsValid() { + v.Set(subst(m, elem, pos)) + } + return v + } + + return pattern +} diff --git a/vendor/vendor.json b/vendor/vendor.json index 97b661f1d..438d98e92 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -52,6 +52,11 @@ "revision": "7e3c02b30806fa5779d3bdfc152ce4c6f40e7b38", "revisionTime": "2016-01-19T13:13:26-08:00" }, + { + "path": "github.com/klauspost/cpuid", + "revision": "349c675778172472f5e8f3a3e0fe187e302e5a10", + "revisionTime": "2016-01-06T11:44:51+01:00" + }, { "path": "github.com/mattn/go-isatty", "revision": "7fcbc72f853b92b5720db4a6b8482be612daef24",