sys/stats: return cgroup mem limit, fall back to sysinfo() (#4002)

This is necessary where in certain environments where
cgroup is used to limit memory usage of a container or
a particular process.

GetStats() is used by caching module to figure out the
optimal cacheable size in memory with cgroup limits
what sysinfo reports might not be the right value set
for a given process.

Fixes #4001
This commit is contained in:
Harshavardhana 2017-04-02 10:46:16 -07:00 committed by GitHub
parent e31e2c3bc2
commit 3bf67668b6
5 changed files with 396 additions and 9 deletions

177
pkg/cgroup/linux.go Normal file
View file

@ -0,0 +1,177 @@
// +build linux
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Package cgroup implements parsing for all the cgroup
// categories and functionality in a simple way.
package cgroup
import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
)
// DO NOT EDIT following constants are chosen defaults for any kernel
// after 3.x, please open a github issue https://github.com/minio/minio/issues
// and discuss first if you wish to change this.
const (
// Default string for looking for kernel memory param.
memoryLimitKernelParam = "memory.limit_in_bytes"
// Points to sys path memory path.
cgroupMemSysPath = "/sys/fs/cgroup/memory"
// Default docker prefix.
dockerPrefixName = "/docker/"
// Proc controller group path.
cgroupFileTemplate = "/proc/%d/cgroup"
)
// CGEntries - represents all the entries in a process cgroup file
// at /proc/<pid>/cgroup as key value pairs.
type CGEntries map[string]string
// GetEntries reads and parses all the cgroup entries for a given process.
func GetEntries(pid int) (CGEntries, error) {
r, err := os.Open(fmt.Sprintf(cgroupFileTemplate, pid))
if err != nil {
return nil, err
}
defer r.Close()
return parseProcCGroup(r)
}
// parseProcCGroup - cgroups are always in the following
// format once enabled you need to know the pid of the
// application you are looking for so that the the
// following parsing logic only parses the file located
// at /proc/<pid>/cgroup.
//
// CGROUP entries id, component and path are always in
// the following format. ``ID:COMPONENT:PATH``
//
// Following code block parses this information and
// returns a procCGroup which is a parsed list of all
// the line by line entires from /proc/<pid>/cgroup.
func parseProcCGroup(r io.Reader) (CGEntries, error) {
var cgEntries = CGEntries{}
// Start reading cgroup categories line by line
// and process them into procCGroup structure.
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := scanner.Text()
tokens := strings.SplitN(line, ":", 3)
if len(tokens) < 3 {
continue
}
name, path := tokens[1], tokens[2]
for _, token := range strings.Split(name, ",") {
name = strings.TrimPrefix(token, "name=")
cgEntries[name] = path
}
}
// Return upon any error while reading the cgroup categories.
if err := scanner.Err(); err != nil {
return nil, err
}
return cgEntries, nil
}
// Fetch value of the cgroup kernel param from the cgroup manager,
// if cgroup manager is configured we should just rely on `cgm` cli
// to fetch all the values for us.
func getManagerKernValue(cname, path, kernParam string) (limit uint64, err error) {
cmd := exec.Command("cgm", "getvalue", cname, path, kernParam)
var out bytes.Buffer
cmd.Stdout = &out
if err = cmd.Run(); err != nil {
return 0, err
}
// Parse the cgm output.
limit, err = strconv.ParseUint(strings.TrimSpace(out.String()), 10, 64)
return limit, err
}
// Get cgroup memory limit file path.
func getMemoryLimitFilePath(cgPath string) string {
path := cgroupMemSysPath
// Docker generates weird cgroup paths that don't
// really exist on the file system.
//
// For example on regular Linux OS :
// `/user.slice/user-1000.slice/session-1.scope`
//
// But they exist as a bind mount on Docker and
// are not accessible : `/docker/<hash>`
//
// We we will just ignore if there is `/docker` in the
// path ignore and fall back to :
// `/sys/fs/cgroup/memory/memory.limit_in_bytes`
if !strings.HasPrefix(cgPath, dockerPrefixName) {
path = filepath.Join(path, cgPath)
}
// Final path.
return filepath.Join(path, memoryLimitKernelParam)
}
// GetMemoryLimit - Fetches cgroup memory limit either from
// a file path at '/sys/fs/cgroup/memory', if path fails then
// fallback to querying cgroup manager.
func GetMemoryLimit(pid int) (limit uint64, err error) {
var cg CGEntries
cg, err = GetEntries(pid)
if err != nil {
return 0, err
}
path := cg["memory"]
limit, err = getManagerKernValue("memory", path, memoryLimitKernelParam)
if err != nil {
// Upon any failure returned from `cgm`, on some systems cgm
// might not be installed. We fallback to using the the sysfs
// path instead to lookup memory limits.
var b []byte
b, err = ioutil.ReadFile(getMemoryLimitFilePath(path))
if err != nil {
return 0, err
}
limit, err = strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64)
}
return limit, err
}

140
pkg/cgroup/linux_test.go Normal file
View file

@ -0,0 +1,140 @@
// +build linux
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cgroup
import (
"io/ioutil"
"os"
"testing"
)
// Testing parsing correctness for various process cgroup files.
func TestProcCGroup(t *testing.T) {
tmpPath, err := ioutil.TempFile("", "cgroup")
if err != nil {
t.Fatal(err)
}
defer os.Remove(tmpPath.Name())
cgroup := `
11:memory:/user.slice
10:blkio:/user.slice
9:hugetlb:/
8:net_cls,net_prio:/
7:perf_event:/
6:pids:/user.slice/user-1000.slice
5:devices:/user.slice
4:cpuset:/
3:cpu,cpuacct:/user.slice
2:freezer:/
1:name=systemd:/user.slice/user-1000.slice/session-1.scope
`
_, err = tmpPath.WriteString(cgroup)
if err != nil {
t.Fatal(err)
}
// Seek back to read from the beginning.
tmpPath.Seek(0, 0)
cg, err := parseProcCGroup(tmpPath)
if err != nil {
t.Fatal(err)
}
path := cg["memory"]
if len(path) == 0 {
t.Fatal("Path component cannot be empty")
}
if path != "/user.slice" {
t.Fatal("Path component cannot be empty")
}
path = cg["systemd"]
if path != "/user.slice/user-1000.slice/session-1.scope" {
t.Fatal("Path component cannot be empty")
}
// Mixed cgroups with different group names.
cgroup = `
11:memory:/newtest/newtest
10:blkio:/user.slice
9:hugetlb:/
8:net_cls,net_prio:/
7:perf_event:/
6:pids:/user.slice/user-1000.slice
5:devices:/user.slice
4:cpuset:/
3:cpu,cpuacct:/newtest/newtest
2:freezer:/
1:name=systemd:/user.slice/user-1000.slice/session-1.scope
`
// Seek back to read from the beginning.
tmpPath.Seek(0, 0)
_, err = tmpPath.WriteString(cgroup)
if err != nil {
t.Fatal(err)
}
// Seek back to read from the beginning.
tmpPath.Seek(0, 0)
cg, err = parseProcCGroup(tmpPath)
if err != nil {
t.Fatal(err)
}
path = cg["memory"]
if path != "/newtest/newtest" {
t.Fatal("Path component cannot be empty")
}
path = cg["systemd"]
if path != "/user.slice/user-1000.slice/session-1.scope" {
t.Fatal("Path component cannot be empty")
}
}
// Tests cgroup memory limit path construction.
func TestMemoryLimitPath(t *testing.T) {
testCases := []struct {
cgroupPath string
expectedPath string
}{
{
cgroupPath: "/user.slice",
expectedPath: "/sys/fs/cgroup/memory/user.slice/memory.limit_in_bytes",
},
{
cgroupPath: "/docker/testing",
expectedPath: "/sys/fs/cgroup/memory/memory.limit_in_bytes",
},
}
for i, testCase := range testCases {
actualPath := getMemoryLimitFilePath(testCase.cgroupPath)
if actualPath != testCase.expectedPath {
t.Fatalf("Test: %d: Expected: %s, got %s", i+1, testCase.expectedPath, actualPath)
}
}
}

19
pkg/cgroup/others.go Normal file
View file

@ -0,0 +1,19 @@
// +build !linux
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cgroup

View file

@ -6,7 +6,7 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*shouldP
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@ -18,14 +18,65 @@
package sys
import "syscall"
import (
"os"
"syscall"
// GetStats - return system statistics.
func GetStats() (stats Stats, err error) {
var si syscall.Sysinfo_t
if err = syscall.Sysinfo(&si); err == nil {
stats.TotalRAM = uint64(si.Totalram)
"github.com/minio/minio/pkg/cgroup"
)
// Get the final system memory limit chosen by the user.
// by default without any configuration on a vanilla Linux
// system you would see physical RAM limit. If cgroup
// is configured at some point in time this function
// would return the memory limit chosen for the given pid.
func getMemoryLimit() (sysLimit uint64, err error) {
if sysLimit, err = getSysinfoMemoryLimit(); err != nil {
// Physical memory info is not accessible, just exit here.
return 0, err
}
return stats, err
// Following code is deliberately ignoring the error.
cGroupLimit, gerr := cgroup.GetMemoryLimit(os.Getpid())
if gerr != nil {
// Upon error just return system limit.
return sysLimit, nil
}
// cgroup limit is lesser than system limit means
// user wants to limit the memory usage further
// treat cgroup limit as the system limit.
if cGroupLimit <= sysLimit {
sysLimit = cGroupLimit
}
// Final system limit.
return sysLimit, nil
}
// Get physical RAM size of the node.
func getSysinfoMemoryLimit() (limit uint64, err error) {
var si syscall.Sysinfo_t
if err = syscall.Sysinfo(&si); err != nil {
return 0, err
}
// Total RAM is always the multiplicative value
// of unit size and total ram.
limit = uint64(si.Unit) * si.Totalram
return limit, nil
}
// GetStats - return system statistics, currently only
// supported value is TotalRAM.
func GetStats() (stats Stats, err error) {
var limit uint64
limit, err = getMemoryLimit()
if err != nil {
return Stats{}, err
}
stats.TotalRAM = limit
return stats, nil
}

View file

@ -1,5 +1,5 @@
/*
* Minio Cloud Storage, (C) 2016,2017 Minio, Inc.
* Minio Cloud Storage, (C) 2016, 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.