add NVMe drive info [model num, serial num, drive temp. etc.] (#10613)

* add NVMe drive info [model num, serial num, drive temp. etc.]
* Ignore fuse partitions
* Add the nvme logic only for linux
* Move smart/nvme structs to a separate file

Co-authored-by: wlan0 <sidharthamn@gmail.com>
This commit is contained in:
Shireesh Anjal 2020-10-04 22:48:46 +05:30 committed by GitHub
parent 017954e7ea
commit f1418a50f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 767 additions and 8 deletions

View file

@ -1,4 +1,4 @@
// +build !freebsd,!netbsd,!openbsd,!solaris
// +build linux
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
@ -23,8 +23,10 @@ import (
"context"
"net/http"
"strings"
"syscall"
"github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/smart"
diskhw "github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/host"
)
@ -68,7 +70,7 @@ func getLocalDiskHwOBD(ctx context.Context, r *http.Request) madmin.ServerDiskHw
addr = GetLocalPeer(globalEndpoints)
}
partitions, err := diskhw.PartitionsWithContext(ctx, true)
parts, err := diskhw.PartitionsWithContext(ctx, true)
if err != nil {
return madmin.ServerDiskHwOBDInfo{
Addr: addr,
@ -78,15 +80,41 @@ func getLocalDiskHwOBD(ctx context.Context, r *http.Request) madmin.ServerDiskHw
drives := []string{}
paths := []string{}
for _, partition := range partitions {
device := partition.Device
path := partition.Mountpoint
partitions := []madmin.PartitionStat{}
for _, part := range parts {
device := part.Device
path := part.Mountpoint
if strings.Index(device, "/dev/") == 0 {
if strings.Contains(device, "loop") {
continue
}
if strings.Contains(device, "/dev/fuse") {
continue
}
drives = append(drives, device)
paths = append(paths, path)
smartInfo, err := smart.GetInfo(device)
if err != nil {
if syscall.EACCES == err {
smartInfo.Error = err.Error()
} else {
return madmin.ServerDiskHwOBDInfo{
Addr: addr,
Error: err.Error(),
}
}
}
partition := madmin.PartitionStat{
Device: part.Device,
Mountpoint: part.Mountpoint,
Fstype: part.Fstype,
Opts: part.Opts,
SmartInfo: smartInfo,
}
partitions = append(partitions, partition)
}
}

View file

@ -1,4 +1,4 @@
// +build freebsd netbsd openbsd solaris
// +build !linux
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.

1
go.mod
View file

@ -20,6 +20,7 @@ require (
github.com/dchest/siphash v1.2.1
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/djherbis/atime v1.0.0
github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d
github.com/dustin/go-humanize v1.0.0
github.com/eclipse/paho.mqtt.golang v1.2.0
github.com/elazarl/go-bindata-assetfs v1.0.0

2
go.sum
View file

@ -85,6 +85,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumC
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/djherbis/atime v1.0.0 h1:ySLvBAM0EvOGaX7TI4dAM5lWj+RdJUCKtGSEHN8SGBg=
github.com/djherbis/atime v1.0.0/go.mod h1:5W+KBIuTwVGcqjIfaTwt+KSYX1o6uep8dtevevQP/f8=
github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d h1:QK8IYltsNy+5QZcDFbVkyInrs98/wHy1tfUTGG91sps=
github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d/go.mod h1:apXo4PA/BgBPrt66j0N45O2stlBTRowdip2igwcUWVc=
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=

View file

@ -1,4 +1,4 @@
// +build !freebsd
// +build linux
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
@ -20,6 +20,7 @@
package madmin
import (
smart "github.com/minio/minio/pkg/smart"
diskhw "github.com/shirou/gopsutil/disk"
)
@ -27,7 +28,16 @@ import (
type ServerDiskHwOBDInfo struct {
Addr string `json:"addr"`
Usage []*diskhw.UsageStat `json:"usages,omitempty"`
Partitions []diskhw.PartitionStat `json:"partitions,omitempty"`
Partitions []PartitionStat `json:"partitions,omitempty"`
Counters map[string]diskhw.IOCountersStat `json:"counters,omitempty"`
Error string `json:"error,omitempty"`
}
// PartitionStat - includes data from both shirou/psutil.diskHw.PartitionStat as well as SMART data
type PartitionStat struct {
Device string `json:"device"`
Mountpoint string `json:"mountpoint,omitempty"`
Fstype string `json:"fstype,omitempty"`
Opts string `json:"opts,omitempty"`
SmartInfo smart.Info `json:"smartInfo,omitempty"`
}

View file

@ -1,3 +1,5 @@
// +build !linux
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*

235
pkg/smart/bindata.go Normal file

File diff suppressed because one or more lines are too long

89
pkg/smart/nvme.go Normal file
View file

@ -0,0 +1,89 @@
// +build linux
/*
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This file has been adopted and then modified from Daniel Swarbrick's smart
* project residing at https://github.com/dswarbrick/smart
*
*/
package smart
import (
"fmt"
"math/big"
"unsafe"
"github.com/dswarbrick/smart/ioctl"
"golang.org/x/sys/unix"
)
// NVMe admin disk query constants
const (
NvmeAdminGetLogPage = 0x02
NvmeAdminIdentify = 0x06
)
var (
nvmeIoctlAdminCmd = ioctl.Iowr('N', 0x41, unsafe.Sizeof(nvmePassthruCommand{}))
)
// NewNVMeDevice creates a new NVMeDevice struct with name
func NewNVMeDevice(name string) *NVMeDevice {
return &NVMeDevice{name, -1}
}
// Open - open device file to find kernel info
func (d *NVMeDevice) Open() (err error) {
d.fd, err = unix.Open(d.Name, unix.O_RDWR, 0600)
return err
}
// Close - closes device file
func (d *NVMeDevice) Close() error {
return unix.Close(d.fd)
}
func (d *NVMeDevice) readLogPage(logID uint8, buf *[]byte) error {
bufLen := len(*buf)
if (bufLen < 4) || (bufLen > 0x4000) || (bufLen%4 != 0) {
return fmt.Errorf("Invalid buffer size")
}
cmd := nvmePassthruCommand{
opcode: NvmeAdminGetLogPage,
nsid: 0xffffffff, // FIXME
addr: uint64(uintptr(unsafe.Pointer(&(*buf)[0]))),
dataLen: uint32(bufLen),
cdw10: uint32(logID) | (((uint32(bufLen) / 4) - 1) << 16),
}
return ioctl.Ioctl(uintptr(d.fd), nvmeIoctlAdminCmd, uintptr(unsafe.Pointer(&cmd)))
}
// le128ToBigInt takes a little-endian 16-byte slice and returns a *big.Int representing it.
func le128ToBigInt(buf [16]byte) *big.Int {
// Int.SetBytes() expects big-endian input, so reverse the bytes locally first
rev := make([]byte, 16)
for x := 0; x < 16; x++ {
rev[x] = buf[16-x-1]
}
return new(big.Int).SetBytes(rev)
}

153
pkg/smart/smart.go Normal file
View file

@ -0,0 +1,153 @@
// +build linux
/*
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package smart
import (
"bytes"
"encoding/binary"
"fmt"
"regexp"
"strings"
"unsafe"
"github.com/dswarbrick/smart/drivedb"
"github.com/dswarbrick/smart/ioctl"
"github.com/dswarbrick/smart/scsi"
"github.com/dswarbrick/smart/utils"
"gopkg.in/yaml.v2"
)
// GetInfo - gets info about device
func GetInfo(device string) (Info, error) {
info := Info{
Device: device,
}
var db drivedb.DriveDb
dec := yaml.NewDecoder(bytes.NewBuffer(MustAsset("drivedb.yaml")))
err := dec.Decode(&db)
if err != nil {
return info, err
}
for i, d := range db.Drives {
db.Drives[i].CompiledRegexp, _ = regexp.Compile(d.ModelRegex)
}
if strings.HasPrefix(device, "/dev/nvme") {
d := NewNVMeDevice(device)
if err := d.Open(); err != nil {
return info, err
}
nvmeInfo, err := getNvmeInfo(d)
if err != nil {
return info, err
}
info.Nvme = nvmeInfo
return info, nil
}
d, err := scsi.OpenSCSIAutodetect(device)
if err != nil {
return info, err
}
switch dev := d.(type) {
case *scsi.SCSIDevice:
scsiInfo, err := getScsiInfo(dev)
if err != nil {
return info, err
}
info.Scsi = scsiInfo
case *scsi.SATDevice:
ataInfo, err := getAtaInfo(dev)
if err != nil {
return info, err
}
info.Ata = ataInfo
}
return info, nil
}
func getNvmeInfo(d *NVMeDevice) (*NvmeInfo, error) {
buf := make([]byte, 4096)
nvmeInfo := &NvmeInfo{}
cmd := nvmePassthruCommand{
opcode: NvmeAdminIdentify,
nsid: 0, // Namespace 0, since we are identifying the controller
addr: uint64(uintptr(unsafe.Pointer(&buf[0]))),
dataLen: uint32(len(buf)),
cdw10: 1, // Identify controller
}
if err := ioctl.Ioctl(uintptr(d.fd), nvmeIoctlAdminCmd, uintptr(unsafe.Pointer(&cmd))); err != nil {
return nvmeInfo, err
}
var controller nvmeIdentController
binary.Read(bytes.NewBuffer(buf[:]), utils.NativeEndian, &controller)
nvmeInfo.VendorID = strings.TrimSpace(fmt.Sprintf("%#04x", controller.VendorID))
nvmeInfo.ModelNum = strings.TrimSpace(fmt.Sprintf("%s", controller.ModelNumber))
nvmeInfo.SerialNum = strings.TrimSpace(fmt.Sprintf("%s", controller.SerialNumber))
nvmeInfo.FirmwareVersion = strings.TrimSpace(fmt.Sprintf("%s", controller.Firmware))
nvmeInfo.MaxDataTransferPages = 1 << controller.Mdts
buf2 := make([]byte, 512)
// Read SMART log
if err := d.readLogPage(0x02, &buf2); err != nil {
return nvmeInfo, err
}
var sl nvmeSMARTLog
binary.Read(bytes.NewBuffer(buf2[:]), utils.NativeEndian, &sl)
unitsRead := le128ToBigInt(sl.DataUnitsRead)
unitsWritten := le128ToBigInt(sl.DataUnitsWritten)
nvmeInfo.CriticalWarning = fmt.Sprintf("%x", sl.CritWarning)
nvmeInfo.Temperature = fmt.Sprintf("%d Celsius",
((uint16(sl.Temperature[1])<<8)|uint16(sl.Temperature[0]))-273) // Kelvin to degrees Celsius
nvmeInfo.SpareAvailable = fmt.Sprintf("%d%%", sl.AvailSpare)
nvmeInfo.SpareThreshold = fmt.Sprintf("%d%%", sl.SpareThresh)
nvmeInfo.DataUnitsReadBytes = unitsRead
nvmeInfo.DataUnitsWrittenBytes = unitsWritten
nvmeInfo.HostReadCommands = le128ToBigInt(sl.HostReads)
nvmeInfo.HostWriteCommands = le128ToBigInt(sl.HostWrites)
nvmeInfo.ControllerBusyTime = le128ToBigInt(sl.CtrlBusyTime)
nvmeInfo.PowerCycles = le128ToBigInt(sl.PowerCycles)
nvmeInfo.PowerOnHours = le128ToBigInt(sl.PowerOnHours)
nvmeInfo.UnsafeShutdowns = le128ToBigInt(sl.UnsafeShutdowns)
nvmeInfo.MediaAndDataIntegrityErrors = le128ToBigInt(sl.MediaErrors)
return nvmeInfo, nil
}
func getScsiInfo(d *scsi.SCSIDevice) (*ScsiInfo, error) {
return &ScsiInfo{}, nil
}
func getAtaInfo(d *scsi.SATDevice) (*AtaInfo, error) {
return &AtaInfo{}, nil
}

239
pkg/smart/types.go Normal file
View file

@ -0,0 +1,239 @@
/*
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package smart
import "math/big"
// Defined in <linux/nvme_ioctl.h>
//nolint:structcheck
type nvmePassthruCommand struct {
opcode uint8
flags uint8
rsvd1 uint16
nsid uint32
cdw2 uint32
cdw3 uint32
metadata uint64
addr uint64
metadataLen uint32
dataLen uint32
cdw10 uint32
cdw11 uint32
cdw12 uint32
cdw13 uint32
cdw14 uint32
cdw15 uint32
timeoutMS uint32
result uint32
} // 72 bytes
type nvmeIdentPowerState struct {
MaxPower uint16 // Centiwatts
Rsvd2 uint8
Flags uint8
EntryLat uint32 // Microseconds
ExitLat uint32 // Microseconds
ReadTput uint8
ReadLat uint8
WriteTput uint8
WriteLat uint8
IdlePower uint16
IdleScale uint8
Rsvd19 uint8
ActivePower uint16
ActiveWorkScale uint8
Rsvd23 [9]byte
}
type nvmeIdentController struct {
VendorID uint16 // PCI Vendor ID
Ssvid uint16 // PCI Subsystem Vendor ID
SerialNumber [20]byte // Serial Number
ModelNumber [40]byte // Model Number
Firmware [8]byte // Firmware Revision
Rab uint8 // Recommended Arbitration Burst
IEEE [3]byte // IEEE OUI Identifier
Cmic uint8 // Controller Multi-Path I/O and Namespace Sharing Capabilities
Mdts uint8 // Maximum Data Transfer Size
Cntlid uint16 // Controller ID
Ver uint32 // Version
Rtd3r uint32 // RTD3 Resume Latency
Rtd3e uint32 // RTD3 Entry Latency
Oaes uint32 // Optional Asynchronous Events Supported
Rsvd96 [160]byte // ...
Oacs uint16 // Optional Admin Command Support
ACL uint8 // Abort Command Limit
Aerl uint8 // Asynchronous Event Request Limit
Frmw uint8 // Firmware Updates
Lpa uint8 // Log Page Attributes
Elpe uint8 // Error Log Page Entries
Npss uint8 // Number of Power States Support
Avscc uint8 // Admin Vendor Specific Command Configuration
Apsta uint8 // Autonomous Power State Transition Attributes
Wctemp uint16 // Warning Composite Temperature Threshold
Cctemp uint16 // Critical Composite Temperature Threshold
Mtfa uint16 // Maximum Time for Firmware Activation
Hmpre uint32 // Host Memory Buffer Preferred Size
Hmmin uint32 // Host Memory Buffer Minimum Size
Tnvmcap [16]byte // Total NVM Capacity
Unvmcap [16]byte // Unallocated NVM Capacity
Rpmbs uint32 // Replay Protected Memory Block Support
Rsvd316 [196]byte // ...
Sqes uint8 // Submission Queue Entry Size
Cqes uint8 // Completion Queue Entry Size
Rsvd514 [2]byte // (defined in NVMe 1.3 spec)
Nn uint32 // Number of Namespaces
Oncs uint16 // Optional NVM Command Support
Fuses uint16 // Fused Operation Support
Fna uint8 // Format NVM Attributes
Vwc uint8 // Volatile Write Cache
Awun uint16 // Atomic Write Unit Normal
Awupf uint16 // Atomic Write Unit Power Fail
Nvscc uint8 // NVM Vendor Specific Command Configuration
Rsvd531 uint8 // ...
Acwu uint16 // Atomic Compare & Write Unit
Rsvd534 [2]byte // ...
Sgls uint32 // SGL Support
Rsvd540 [1508]byte // ...
Psd [32]nvmeIdentPowerState // Power State Descriptors
Vs [1024]byte // Vendor Specific
} // 4096 bytes
type nvmeLBAF struct {
Ms uint16
Ds uint8
Rp uint8
}
//nolint:deadcode
type nvmeIdentNamespace struct {
Nsze uint64
Ncap uint64
Nuse uint64
Nsfeat uint8
Nlbaf uint8
Flbas uint8
Mc uint8
Dpc uint8
Dps uint8
Nmic uint8
Rescap uint8
Fpi uint8
Rsvd33 uint8
Nawun uint16
Nawupf uint16
Nacwu uint16
Nabsn uint16
Nabo uint16
Nabspf uint16
Rsvd46 [2]byte
Nvmcap [16]byte
Rsvd64 [40]byte
Nguid [16]byte
EUI64 [8]byte
Lbaf [16]nvmeLBAF
Rsvd192 [192]byte
Vs [3712]byte
} // 4096 bytes
type nvmeSMARTLog struct {
CritWarning uint8
Temperature [2]uint8
AvailSpare uint8
SpareThresh uint8
PercentUsed uint8
Rsvd6 [26]byte
DataUnitsRead [16]byte
DataUnitsWritten [16]byte
HostReads [16]byte
HostWrites [16]byte
CtrlBusyTime [16]byte
PowerCycles [16]byte
PowerOnHours [16]byte
UnsafeShutdowns [16]byte
MediaErrors [16]byte
NumErrLogEntries [16]byte
WarningTempTime uint32
CritCompTime uint32
TempSensor [8]uint16
Rsvd216 [296]byte
} // 512 bytes
// NVMeDevice represents drive data about NVMe drives
type NVMeDevice struct {
Name string
fd int
}
// Info contains S.M.A.R.T data about the drive
type Info struct {
Device string `json:"device"`
Scsi *ScsiInfo `json:"scsi,omitempty"`
Nvme *NvmeInfo `json:"nvme,omitempty"`
Ata *AtaInfo `json:"ata,omitempty"`
Error string `json:"error,omitempty"`
}
// AtaInfo contains ATA drive info
type AtaInfo struct {
LUWWNDeviceID string `json:"scsiLuWWNDeviceID,omitempty"`
SerialNum string `json:"serialNum,omitempty"`
ModelNum string `json:"modelNum,omitempty"`
FirmwareRevision string `json:"firmwareRevision,omitempty"`
RotationRate string `json:"RotationRate,omitempty"`
ATAMajorVersion string `json:"MajorVersion,omitempty"`
ATAMinorVersion string `json:"MinorVersion,omitempty"`
SmartSupportAvailable bool `json:"smartSupportAvailable,omitempty"`
SmartSupportEnabled bool `json:"smartSupportEnabled,omitempty"`
ErrorLog string `json:"smartErrorLog,omitempty"`
Transport string `json:"transport,omitempty"`
}
// ScsiInfo contains SCSI drive Info
type ScsiInfo struct {
CapacityBytes int64 `json:"scsiCapacityBytes,omitempty"`
ModeSenseBuf string `json:"scsiModeSenseBuf,omitempty"`
RespLen int64 `json:"scsirespLen,omitempty"`
BdLen int64 `json:"scsiBdLen,omitempty"`
Offset int64 `json:"scsiOffset,omitempty"`
RPM int64 `json:"sciRpm,omitempty"`
}
// NvmeInfo contains NVMe drive info
type NvmeInfo struct {
SerialNum string `json:"serialNum,omitempty"`
VendorID string `json:"vendorId,omitempty"`
FirmwareVersion string `json:"firmwareVersion,omitempty"`
ModelNum string `json:"modelNum,omitempty"`
SpareAvailable string `json:"spareAvailable,omitempty"`
SpareThreshold string `json:"spareThreshold,omitempty"`
Temperature string `json:"temperature,omitempty"`
CriticalWarning string `json:"criticalWarning,omitempty"`
MaxDataTransferPages int `json:"maxDataTransferPages,omitempty"`
ControllerBusyTime *big.Int `json:"controllerBusyTime,omitempty"`
PowerOnHours *big.Int `json:"powerOnHours,omitempty"`
PowerCycles *big.Int `json:"powerCycles,omitempty"`
UnsafeShutdowns *big.Int `json:"unsafeShutdowns,omitempty"`
MediaAndDataIntegrityErrors *big.Int `json:"mediaAndDataIntgerityErrors,omitempty"`
DataUnitsReadBytes *big.Int `json:"dataUnitsReadBytes,omitempty"`
DataUnitsWrittenBytes *big.Int `json:"dataUnitsWrittenBytes,omitempty"`
HostReadCommands *big.Int `json:"hostReadCommands,omitempty"`
HostWriteCommands *big.Int `json:"hostWriteCommands,omitempty"`
}