Merge branch '2.0'

Signed-off-by: Felix Abecassis <fabecassis@nvidia.com>
This commit is contained in:
Felix Abecassis 2017-11-13 21:10:52 -08:00
commit fe1874942b
75 changed files with 379 additions and 4343 deletions

2
.gitignore vendored
View File

@ -6,6 +6,4 @@
.\#*
*~
\#*
bin
dist
samples

160
CLA
View File

@ -1,160 +0,0 @@
The NVIDIA Docker
Software Grant and Corporate Contributor License Agreement ("Agreement")
Thank you for your interest in the NVIDIA Docker Project (the
"Project"). In order to clarify the intellectual property license
granted with Contributions from any person or entity, NVIDIA
Corporation (the “Copyright Holders") must have a Contributor License
Agreement (CLA) on file that has been signed by each Contributor,
indicating agreement to the license terms below. This license is
for your protection as a Contributor as well as the protection of the
Project and its users; it does not change your rights to use your own
Contributions for any other purpose.
This version of the Agreement allows an entity (the "Corporation") to
submit Contributions to the Project, to authorize Contributions
submitted by its designated employees to the Project, and to grant
copyright and patent licenses thereto to the Copyright Holders.
If you have not already done so, please complete and sign, then scan and
email a pdf file of this Agreement to digits@nvidia.com.
Please read this document carefully before signing and keep a copy for
your records.
Corporation name: ________________________________________________
Corporation address: ________________________________________________
________________________________________________
________________________________________________
Point of Contact: ________________________________________________
E-Mail: ________________________________________________
Telephone: _____________________ Fax: _____________________
You accept and agree to the following terms and conditions for Your
present and future Contributions submitted to the Project. In
return, the Copyright Holders shall not use Your Contributions in a way
that is contrary to the public benefit or inconsistent with its nonprofit
status and bylaws in effect at the time of the Contribution. Except
for the license granted herein to the Copyright Holders and recipients of
software distributed by the Copyright Holders, You reserve all right, title,
and interest in and to Your Contributions.
1. Definitions.
"You" (or "Your") shall mean the copyright owner or legal entity
authorized by the copyright owner that is making this Agreement
with the Copyright Holders. For legal entities, the entity making a
Contribution and all other entities that control, are controlled by,
or are under common control with that entity are considered to be a
single Contributor. For the purposes of this definition, "control"
means (i) the power, direct or indirect, to cause the direction or
management of such entity, whether by contract or otherwise, or
(ii) ownership of fifty percent (50%) or more of the outstanding
shares, or (iii) beneficial ownership of such entity.
"Contribution" shall mean the code, documentation or other original
works of authorship expressly identified in Schedule B, as well as
any original work of authorship, including
any modifications or additions to an existing work, that is intentionally
submitted by You to the Copyright Holders for inclusion in, or
documentation of, any of the products owned or managed by the
Copyright Holders (the "Work"). For the purposes of this definition,
"submitted" means any form of electronic, verbal, or written
communication sent to the Copyright Holders or its representatives,
including but not limited to communication on electronic mailing
lists, source code control systems, and issue tracking systems
that are managed by, or on behalf of, the Copyright Holders for the
purpose of discussing and improving the Work, but excluding
communication that is conspicuously marked or otherwise designated
in writing by You as "Not a Contribution."
2. Grant of Copyright License. Subject to the terms and conditions
of this Agreement, You hereby grant to the Copyright Holders and to
recipients of software distributed by the Copyright Holders a
perpetual, worldwide, non-exclusive, no-charge, royalty-free,
irrevocable copyright license to reproduce, prepare derivative works
of, publicly display, publicly perform, sublicense, and distribute
Your Contributions and such derivative works.
3. Grant of Patent License. Subject to the terms and conditions of
this Agreement, You hereby grant to the Copyright Holders and to
recipients of software distributed by the Copyright Holders
a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license
to make, have made, use, offer to sell, sell, import, and otherwise
transfer the Work, where such license applies only to those
patent claims licensable by You that are necessarily infringed
by Your Contribution(s) alone or by combination of Your Contribution(s)
with the Work to which such Contribution(s) were submitted.
If any entity institutes patent litigation against You or any
other entity (including a cross-claim or counterclaim in a lawsuit)
alleging that your Contribution, or the Work to which you have
contributed, constitutes direct or contributory patent infringement,
then any patent licenses granted to that entity under this Agreement
for that Contribution or Work shall terminate as of the date such
litigation is filed.
4. You represent that You are legally entitled to grant the above
license. You represent further that each employee of the
Corporation designated on Schedule A below (or in a subsequent
written modification to that Schedule) is authorized to submit
Contributions on behalf of the Corporation.
5. You represent that each of Your Contributions is Your original
creation (see section 7 for submissions on behalf of others).
6. You are not expected to provide support for Your Contributions,
except to the extent You desire to provide support. You may provide
support for free, for a fee, or not at all. Unless required by
applicable law or agreed to in writing, You provide Your
Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
OF ANY KIND, either express or implied, including, without
limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT,
MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE.
7. Should You wish to submit work that is not Your original creation,
You may submit it to the Copyright Holders separately from any
Contribution, identifying the complete details of its source and
of any license or other restriction (including, but not limited
to, related patents, trademarks, and license agreements) of which
you are personally aware, and conspicuously marking the work as
"Submitted on behalf of a third-party: [named here]".
8. It is your responsibility to notify the Copyright Holders when any change
is required to the list of designated employees authorized to submit
Contributions on behalf of the Corporation, or to the Corporation's
Point of Contact with the Copyright Holders.
Please sign: __________________________________ Date: _______________
Title: __________________________________
Corporation: __________________________________
Schedule A
[Initial list of designated employees. NB: authorization is not
tied to particular Contributions.]
Schedule B
[Identification of optional concurrent software grant. Would be
left blank or omitted if there is no concurrent software grant.]

View File

@ -1,43 +0,0 @@
FROM golang:1.5
RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && \
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +2 > cudasign.pub && \
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64 /" > /etc/apt/sources.list.d/cuda.list
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-cudart-dev-6-5=6.5-19 \
cuda-misc-headers-6-5=6.5-19 && \
rm -rf /var/lib/apt/lists/*
RUN objcopy --redefine-sym memcpy=memcpy@GLIBC_2.2.5 /usr/local/cuda-6.5/lib64/libcudart_static.a
RUN NVIDIA_GDK_SUM=1e32e58f69fe29ee67b845233e7aa9347f37994463252bccbc8bfc8a7104ab5a && \
wget -O gdk.run -q http://developer.download.nvidia.com/compute/cuda/7.5/Prod/local_installers/cuda_352_39_gdk_linux.run && \
echo "$NVIDIA_GDK_SUM gdk.run" | sha256sum -c --strict - && \
chmod +x gdk.run && ./gdk.run --silent && rm gdk.run
VOLUME /go/bin
WORKDIR /go/src/github.com/NVIDIA/nvidia-docker/src
COPY src .
ENV CGO_CFLAGS "-I /usr/local/cuda-6.5/include -I /usr/include/nvidia/gdk"
ENV CGO_LDFLAGS "-L /usr/local/cuda-6.5/lib64"
RUN go get -v ./...
ARG USER_ID
RUN useradd --non-unique --uid $USER_ID nvidia
USER nvidia
ARG CR_NAME
ARG CR_EMAIL
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
ARG PKG_ARCH
ENV VERSION $PKG_VERS
CMD go install -v -ldflags="-s -X main.Version=$VERSION" ./...

View File

@ -1,53 +0,0 @@
FROM ppc64le/golang:1.6.3
RUN echo "deb http://httpredir.debian.org/debian stretch main" >> /etc/apt/sources.list && \
echo "deb http://httpredir.debian.org/debian stretch-updates main" >> /etc/apt/sources.list && \
echo "deb http://security.debian.org stretch/updates main" >> /etc/apt/sources.list && \
apt-get clean && apt-get update && \
apt-get upgrade -y \
binutils \
libc6 \
libc6-dev \
libc-bin && \
rm -rf /var/lib/apt/lists/*
RUN NVIDIA_GPGKEY_SUM=bd841d59a27a406e513db7d405550894188a4c1cd96bf8aa4f82f1b39e0b5c1c && \
NVIDIA_GPGKEY_FPR=889bee522da690103c4b085ed88c3d385c37d3be && \
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/GPGKEY && \
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +2 > cudasign.pub && \
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/ppc64el /" > /etc/apt/sources.list.d/cuda.list
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-cudart-dev-7-5=7.5-23 \
cuda-misc-headers-7-5=7.5-23 && \
rm -rf /var/lib/apt/lists/*
RUN NVIDIA_GDK_SUM=064678e29d39f0c21f4b66c5e2fb18ba65fd9bc3372d0b319c31cab0e791fc1c && \
curl -fsSL -o gdk.run http://developer.download.nvidia.com/compute/cuda/7.5/Prod/gdk/gdk_linux_ppc64le_352_79_release.run && \
echo "$NVIDIA_GDK_SUM gdk.run" | sha256sum -c --strict - && \
chmod +x gdk.run && ./gdk.run --silent && rm gdk.run
VOLUME /go/bin
WORKDIR /go/src/github.com/NVIDIA/nvidia-docker/src
COPY src .
ENV CGO_CFLAGS "-I /usr/local/cuda-7.5/include -I /usr/include/nvidia/gdk"
ENV CGO_LDFLAGS "-L /usr/local/cuda-7.5/lib64"
RUN go get -v ./...
ARG USER_ID
RUN useradd --non-unique --uid $USER_ID nvidia
USER nvidia
ARG CR_NAME
ARG CR_EMAIL
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
ARG PKG_ARCH
ENV VERSION $PKG_VERS
CMD go install -v -ldflags="-s -X main.Version=$VERSION" ./...

36
Dockerfile.centos7 Normal file
View File

@ -0,0 +1,36 @@
FROM centos:7
# packaging dependencies
RUN yum install -y \
rpm-build && \
rm -rf /var/cache/yum/*
# packaging
ARG PKG_VERS
ARG PKG_REV
ARG RUNTIME_VERSION
ARG DOCKER_VERSION
ENV VERSION $PKG_VERS
ENV RELEASE $PKG_REV
ENV RUNTIME_VERSION $RUNTIME_VERSION
ENV DOCKER_VERSION $DOCKER_VERSION
# output directory
ENV DIST_DIR=/tmp/nvidia-container-runtime-$PKG_VERS/SOURCES
RUN mkdir -p $DIST_DIR
COPY nvidia-docker $DIST_DIR
COPY daemon.json $DIST_DIR
WORKDIR $DIST_DIR/..
COPY rpm .
CMD rpmbuild --clean -bb \
-D "_topdir $PWD" \
-D "version $VERSION" \
-D "release $RELEASE" \
-D "runtime_version $RUNTIME_VERSION" \
-D "docker_version $DOCKER_VERSION" \
SPECS/nvidia-docker2.spec && \
mv RPMS/noarch/*.rpm /dist

View File

@ -1,37 +0,0 @@
FROM ubuntu:14.04
RUN apt-get update && apt-get install -y --no-install-recommends \
vim-nox \
dh-make \
dh-systemd \
fakeroot \
build-essential \
devscripts && \
rm -rf /var/lib/apt/lists/*
ARG USER_ID
ARG CR_NAME
ARG CR_EMAIL
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
ARG PKG_ARCH
VOLUME /dist
VOLUME /build
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
ENV DEBFULLNAME $CR_NAME
ENV DEBEMAIL $CR_EMAIL
ENV REVISION $PKG_VERS-$PKG_REV
ENV ARCHITECTURE $PKG_ARCH
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
USER nvidia
CMD tar -xf /dist/*.tar.xz && \
read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
dch -c /build/deb/changelog -v $REVISION --no-auto-nmu ; \
dh_make -y -s -c bsd -d -t /build/deb -f /dist/*.tar.xz && \
debuild --preserve-env --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
mv /tmp/*.deb /dist

View File

@ -1,39 +0,0 @@
FROM ppc64le/ubuntu:14.04
RUN apt-get update && apt-get install -y --no-install-recommends \
vim-nox \
dh-make \
dh-systemd \
fakeroot \
build-essential \
devscripts && \
rm -rf /var/lib/apt/lists/*
ARG USER_ID
ARG CR_NAME
ARG CR_EMAIL
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
# Unused but kept for consistency with x86
ARG PKG_ARCH
VOLUME /dist
VOLUME /build
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
ENV DEBFULLNAME $CR_NAME
ENV DEBEMAIL $CR_EMAIL
ENV REVISION $PKG_VERS-$PKG_REV
# Hard-coded since ppc64le doesn't have the issue of amd64 vs x86_64.
ENV ARCHITECTURE ppc64el
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
USER nvidia
CMD tar -xf /dist/*.tar.xz && \
read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
dch -c /build/deb/changelog -v $REVISION --no-auto-nmu ; \
dh_make -y -s -c bsd -d -t /build/deb -f /dist/*.tar.xz && \
debuild --preserve-env --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
mv /tmp/*.deb /dist

View File

@ -1,47 +0,0 @@
FROM centos:7
RUN yum install -y \
vim \
rpm-build && \
rm -rf /var/cache/yum/*
RUN sed -i 's/include_release_info = 1/include_release_info = 0/' /usr/share/vim/vim74/ftplugin/spec.vim && \
echo 'let g:spec_chglog_format = "%a %b %d %Y ".$VENDOR." <".$EMAIL."> ".$VERSION."-".$REVISION' >> /etc/vimrc && \
echo 'autocmd VimEnter *.spec execute "normal \\c"' >> /etc/vimrc
ARG USER_ID
ARG CR_NAME
ARG CR_EMAIL
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
ARG PKG_ARCH
VOLUME /dist
VOLUME /build
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
ENV VENDOR $CR_NAME
ENV EMAIL $CR_EMAIL
ENV NAME $PKG_NAME
ENV VERSION $PKG_VERS
ENV REVISION $PKG_REV
ENV ARCHITECTURE $PKG_ARCH
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
USER nvidia
CMD read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
vim /build/rpm/SPECS/$NAME.spec ; \
cp -Lr /build/rpm/* . && \
cp /dist/*.tar.xz SOURCES && \
rpmbuild --clean -bb \
-D "_topdir $PWD" \
-D "vendor $VENDOR" \
-D "email $EMAIL" \
-D "name $NAME" \
-D "version $VERSION" \
-D "revision $REVISION" \
-D "architecture $ARCHITECTURE" \
SPECS/$NAME.spec && \
mv RPMS/$ARCHITECTURE/*.rpm /dist

View File

@ -1,47 +0,0 @@
FROM ibmcom/centos-ppc64le:7
RUN yum install -y \
vim \
rpm-build && \
rm -rf /var/cache/yum/*
RUN sed -i 's/include_release_info = 1/include_release_info = 0/' /usr/share/vim/vim74/ftplugin/spec.vim && \
echo 'let g:spec_chglog_format = "%a %b %d %Y ".$VENDOR." <".$EMAIL."> ".$VERSION."-".$REVISION' >> /etc/vimrc && \
echo 'autocmd VimEnter *.spec execute "normal \\c"' >> /etc/vimrc
ARG USER_ID
ARG CR_NAME
ARG CR_EMAIL
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
ARG PKG_ARCH
VOLUME /dist
VOLUME /build
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
ENV VENDOR $CR_NAME
ENV EMAIL $CR_EMAIL
ENV NAME $PKG_NAME
ENV VERSION $PKG_VERS
ENV REVISION $PKG_REV
ENV ARCHITECTURE $PKG_ARCH
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
USER nvidia
CMD read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
vim /build/rpm/SPECS/$NAME.spec ; \
cp -Lr /build/rpm/* . && \
cp /dist/*.tar.xz SOURCES && \
rpmbuild --clean -bb \
-D "_topdir $PWD" \
-D "vendor $VENDOR" \
-D "email $EMAIL" \
-D "name $NAME" \
-D "version $VERSION" \
-D "revision $REVISION" \
-D "architecture $ARCHITECTURE" \
SPECS/$NAME.spec && \
mv RPMS/$ARCHITECTURE/*.rpm /dist

40
Dockerfile.xenial Normal file
View File

@ -0,0 +1,40 @@
FROM ubuntu:xenial
# packaging dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
dh-make \
fakeroot \
build-essential \
devscripts && \
rm -rf /var/lib/apt/lists/*
# packaging
ARG PKG_VERS
ARG PKG_REV
ARG RUNTIME_VERSION
ARG DOCKER_VERSION
ENV DEBFULLNAME "NVIDIA CORPORATION"
ENV DEBEMAIL "cudatools@nvidia.com"
ENV REVISION "$PKG_VERS-$PKG_REV"
ENV RUNTIME_VERSION $RUNTIME_VERSION
ENV DOCKER_VERSION $DOCKER_VERSION
ENV DISTRIB "UNRELEASED"
ENV SECTION ""
# output directory
ENV DIST_DIR=/tmp/nvidia-docker2-$PKG_VERS
RUN mkdir -p $DIST_DIR
# nvidia-docker 2.0
COPY nvidia-docker $DIST_DIR/nvidia-docker
COPY daemon.json $DIST_DIR/daemon.json
WORKDIR $DIST_DIR
COPY debian ./debian
RUN dch --create --package nvidia-docker2 -v "$REVISION" "v$REVISION" -D "$DISTRIB" && \
dch -r ""
CMD debuild --preserve-env --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
mv /tmp/*.deb /dist

View File

@ -1,4 +1,4 @@
Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions

145
Makefile
View File

@ -1,87 +1,90 @@
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
NV_DOCKER ?= docker
DOCKER ?= docker
prefix ?= /usr/local
exec_prefix ?= $(prefix)
bindir ?= $(exec_prefix)/bin
VERSION := 2.0.1
PKG_REV := 1
RUNTIME_VERSION := 1.1.0
CR_NAME := NVIDIA CORPORATION
CR_EMAIL := digits@nvidia.com
PKG_NAME := nvidia-docker
PKG_VERS := 1.0.1
PKG_REV := 1
ifneq ($(MAKECMDGOALS),rpm)
PKG_ARCH := amd64
else
PKG_ARCH := x86_64
endif
DIST_DIR := $(CURDIR)/dist
# Mirror the BUILD_ARCH from the build Dockerfile
BUILD_ARCH = .$(shell uname -m)
ifneq ($(BUILD_ARCH),.ppc64le)
BUILD_ARCH =
else
PKG_ARCH = ppc64le
endif
.NOTPARALLEL:
.PHONY: all
BIN_DIR := $(CURDIR)/bin
DIST_DIR := $(CURDIR)/dist
BUILD_DIR := $(CURDIR)/build
DOCKER_BIN := $(BIN_DIR)/nvidia-docker
PLUGIN_BIN := $(BIN_DIR)/nvidia-docker-plugin
all: xenial centos7
DOCKER_VERS := $(shell $(NV_DOCKER) version -f '{{.Client.Version}}')
DOCKER_VERS_MAJ := $(shell echo $(DOCKER_VERS) | cut -d. -f1)
DOCKER_VERS_MIN := $(shell echo $(DOCKER_VERS) | cut -d. -f2)
xenial: 17.09.0-xenial 17.06.2-xenial 17.03.2-xenial 1.13.1-xenial 1.12.6-xenial
DOCKER_RMI := $(NV_DOCKER) rmi
DOCKER_RUN := $(NV_DOCKER) run --rm --net=host
DOCKER_IMAGES := $(NV_DOCKER) images -q $(PKG_NAME)
DOCKER_BUILD := $(NV_DOCKER) build --build-arg USER_ID="$(shell id -u)" \
--build-arg CR_NAME="$(CR_NAME)" \
--build-arg CR_EMAIL="$(CR_EMAIL)" \
--build-arg PKG_NAME="$(PKG_NAME)" \
--build-arg PKG_VERS="$(PKG_VERS)" \
--build-arg PKG_REV="$(PKG_REV)" \
--build-arg PKG_ARCH="$(PKG_ARCH)"
centos7: 17.09.0.ce-centos7 17.06.2.ce-centos7 17.03.2.ce-centos7 1.12.6-centos7
.PHONY: all build install uninstall clean distclean tarball deb rpm
17.09.0-xenial:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker17.09.0-1" \
--build-arg DOCKER_VERSION="docker-ce (= 17.09.0~ce-0~ubuntu) | docker-ee (= 17.09.0~ee-0~ubuntu)" \
--build-arg PKG_VERS="$(VERSION)+docker17.09.0" \
--build-arg PKG_REV="$(PKG_REV)" \
-t nvidia-docker2:$@ -f Dockerfile.xenial .
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
all: build
17.06.2-xenial:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker17.06.2-1" \
--build-arg DOCKER_VERSION="docker-ce (= 17.06.2~ce-0~ubuntu) | docker-ee (= 17.06.2~ee-0~ubuntu)" \
--build-arg PKG_VERS="$(VERSION)+docker17.06.2" \
--build-arg PKG_REV="$(PKG_REV)" \
-t nvidia-docker2:$@ -f Dockerfile.xenial .
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
build: distclean
@mkdir -p $(BIN_DIR)
@$(DOCKER_BUILD) -t $(PKG_NAME):$@ -f Dockerfile.$@$(BUILD_ARCH) $(CURDIR)
@$(DOCKER_RUN) -v $(BIN_DIR):/go/bin:Z $(PKG_NAME):$@
17.03.2-xenial:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker17.03.2-1" \
--build-arg DOCKER_VERSION="docker-ce (= 17.03.2~ce-0~ubuntu-xenial) | docker-ee (= 17.03.2~ee-0~ubuntu-xenial)" \
--build-arg PKG_VERS="$(VERSION)+docker17.03.2" \
--build-arg PKG_REV="$(PKG_REV)" \
-t nvidia-docker2:$@ -f Dockerfile.xenial .
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
install: build
install -D -m 755 -t $(bindir) $(DOCKER_BIN)
install -D -m 755 -t $(bindir) $(PLUGIN_BIN)
1.13.1-xenial:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker1.13.1-1" \
--build-arg DOCKER_VERSION="docker-engine(= 1.13.1-0~ubuntu-xenial)" \
--build-arg PKG_VERS="$(VERSION)+docker1.13.1" \
--build-arg PKG_REV="$(PKG_REV)" \
-t nvidia-docker2:$@ -f Dockerfile.xenial .
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
uninstall:
$(RM) $(bindir)/$(notdir $(DOCKER_BIN))
$(RM) $(bindir)/$(notdir $(PLUGIN_BIN))
1.12.6-xenial:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker1.12.6-1" \
--build-arg DOCKER_VERSION="docker-engine (= 1.12.6-0~ubuntu-xenial) | docker.io (= 1.12.6-0ubuntu1~16.04.1)" \
--build-arg PKG_VERS="$(VERSION)+docker1.12.6" \
--build-arg PKG_REV="$(PKG_REV)" \
-t nvidia-docker2:$@ -f Dockerfile.xenial .
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
clean:
-@$(DOCKER_IMAGES) | xargs $(DOCKER_RMI) 2> /dev/null
-@$(DOCKER_RMI) golang:1.5 ubuntu:14.04 centos:7 2> /dev/null
17.09.0.ce-centos7:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker17.09.0" \
--build-arg DOCKER_VERSION="docker-ce = 17.09.0.ce" \
--build-arg PKG_VERS="$(VERSION)" \
--build-arg PKG_REV="$(PKG_REV).docker17.09.0.ce" \
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@
distclean:
@rm -rf $(BIN_DIR)
@rm -rf $(DIST_DIR)
17.06.2.ce-centos7:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker17.06.2" \
--build-arg DOCKER_VERSION="docker-ce = 17.06.2.ce" \
--build-arg PKG_VERS="$(VERSION)" \
--build-arg PKG_REV="$(PKG_REV).docker17.06.2.ce" \
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@
tarball: build
@mkdir -p $(DIST_DIR)
tar --transform='s;.*/;$(PKG_NAME)/;' -caf $(DIST_DIR)/$(PKG_NAME)_$(PKG_VERS)_$(PKG_ARCH).tar.xz $(BIN_DIR)/*
@printf "\nFind tarball at $(DIST_DIR)\n\n"
17.03.2.ce-centos7:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker17.03.2" \
--build-arg DOCKER_VERSION="docker-ce = 17.03.2.ce" \
--build-arg PKG_VERS="$(VERSION)" \
--build-arg PKG_REV="$(PKG_REV).docker17.03.2.ce" \
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@
deb: tarball
@$(DOCKER_BUILD) -t $(PKG_NAME):$@ -f Dockerfile.$@$(BUILD_ARCH) $(CURDIR)
@$(DOCKER_RUN) -ti -v $(DIST_DIR):/dist:Z -v $(BUILD_DIR):/build:Z $(PKG_NAME):$@
@printf "\nFind packages at $(DIST_DIR)\n\n"
rpm: tarball
@$(DOCKER_BUILD) -t $(PKG_NAME):$@ -f Dockerfile.$@$(BUILD_ARCH) $(CURDIR)
@$(DOCKER_RUN) -ti -v $(DIST_DIR):/dist:Z -v $(BUILD_DIR):/build:Z $(PKG_NAME):$@
@printf "\nFind packages at $(DIST_DIR)\n\n"
1.12.6-centos7:
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker1.12.6" \
--build-arg DOCKER_VERSION="docker = 2:1.12.6" \
--build-arg PKG_VERS="$(VERSION)" \
--build-arg PKG_REV="$(PKG_REV).docker1.12.6" \
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@

114
README.md
View File

@ -1,61 +1,103 @@
# Docker Engine Utility for NVIDIA GPUs
**We are beginning the transition towards [nvidia-docker 2.0](https://github.com/NVIDIA/nvidia-docker/tree/2.0), please help us test it.**
[![GitHub license](https://img.shields.io/badge/license-New%20BSD-blue.svg?style=flat-square)](https://raw.githubusercontent.com/NVIDIA/nvidia-docker/master/LICENSE)
[![Package repository](https://img.shields.io/badge/packages-repository-b956e8.svg?style=flat-square)](https://nvidia.github.io/nvidia-docker)
![nvidia-gpu-docker](https://cloud.githubusercontent.com/assets/3028125/12213714/5b208976-b632-11e5-8406-38d379ec46aa.png)
# Documentation
**Warning: This project is based on an alpha release (libnvidia-container). It is already more stable than 1.0 but we need help testing it.**
The full documentation is available on the [repository wiki](https://github.com/NVIDIA/nvidia-docker/wiki).
A good place to start is to understand [why nvidia-docker](https://github.com/NVIDIA/nvidia-docker/wiki/Motivation) is needed in the first place.
## Differences with 1.0
* Doesn't require wrapping the Docker CLI and doesn't need a separate daemon,
* GPU isolation is now achieved with environment variable `NVIDIA_VISIBLE_DEVICES`,
* Can enable GPU support for any Docker image. Not just the ones based on our official CUDA images,
* Package repositories are available for Ubuntu and CentOS,
* Uses a new implementation based on [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
## Removing nvidia-docker 1.0
# Quick start
Version 1.0 of the nvidia-docker package must be cleanly removed before continuing.
You must stop and remove **all** containers started with nvidia-docker 1.0.
Assuming the NVIDIA drivers and Docker® Engine are properly installed (see [installation](https://github.com/NVIDIA/nvidia-docker/wiki/Installation))
#### _Ubuntu distributions_
#### Ubuntu distributions
```sh
# Install nvidia-docker and nvidia-docker-plugin
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker_1.0.1-1_amd64.deb
sudo dpkg -i /tmp/nvidia-docker*.deb && rm /tmp/nvidia-docker*.deb
# Test nvidia-smi
nvidia-docker run --rm nvidia/cuda nvidia-smi
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
sudo apt-get purge nvidia-docker
```
#### _CentOS distributions_
```sh
# Install nvidia-docker and nvidia-docker-plugin
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker-1.0.1-1.x86_64.rpm
sudo rpm -i /tmp/nvidia-docker*.rpm && rm /tmp/nvidia-docker*.rpm
sudo systemctl start nvidia-docker
#### CentOS distributions
# Test nvidia-smi
nvidia-docker run --rm nvidia/cuda nvidia-smi
```
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
sudo yum remove nvidia-docker
```
#### _Other distributions_
```sh
# Install nvidia-docker and nvidia-docker-plugin
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker_1.0.1_amd64.tar.xz
sudo tar --strip-components=1 -C /usr/bin -xvf /tmp/nvidia-docker*.tar.xz && rm /tmp/nvidia-docker*.tar.xz
## Installation
# Run nvidia-docker-plugin
sudo -b nohup nvidia-docker-plugin > /tmp/nvidia-docker.log
**If you have a custom `/etc/docker/daemon.json`, the `nvidia-docker2` package will override it.**
# Test nvidia-smi
nvidia-docker run --rm nvidia/cuda nvidia-smi
#### Ubuntu distributions
1. Install the repository for your distribution by following the instructions [here](http://nvidia.github.io/nvidia-docker/).
2. Install the `nvidia-docker2` package and restart the Docker daemon:
```
sudo apt-get install nvidia-docker2
sudo pkill -SIGHUP dockerd
```
#### _ppc64le (POWER) Archictecture_
There is limited build support for ppc64le. Running `make deb` will build the nvidia-docker deb for ppc64le (if run on a ppc64le system). If the deb install fails because you have the 'docker.io' (>= v1.9) package installed, but not the 'docker-engine' package, you can force-install. There is currently no docker-provided docker-engine repository for ppc64le.
#### CentOS distributions
1. Install the repository for your distribution by following the instructions [here](http://nvidia.github.io/nvidia-docker/).
2. Install the `nvidia-docker2` package and restart the Docker daemon:
```
sudo yum install nvidia-docker2
sudo pkill -SIGHUP dockerd
```
Not all the build targets for ppc64le have been implemented. If you would like for a Dockerfile to be created to enable a ppc64le target, please open an issue.
## Usage
# Issues and Contributing
#### NVIDIA runtime
nvidia-docker registers a new container runtime to the Docker daemon.
You must select the `nvidia` runtime when using `docker run`:
```
docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
```
**A signed copy of the [Contributor License Agreement](https://raw.githubusercontent.com/NVIDIA/nvidia-docker/master/CLA) needs to be provided to digits@nvidia.com before any change can be accepted.**
#### GPU isolation
Set the environment variable `NVIDIA_VISIBLE_DEVICES` in the container:
```
docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=0 --rm nvidia/cuda nvidia-smi
```
#### Non-CUDA image:
Setting `NVIDIA_VISIBLE_DEVICES` will enable GPU support for any container image:
```
docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --rm debian:stretch nvidia-smi
```
## Advanced
#### Backward compatibility
To help transitioning code from 1.0 to 2.0, a bash script is provided in `/usr/bin/nvidia-docker` for backward compatibility.
It will automatically inject the `--runtime=nvidia` argument and convert `NV_GPU` to `NVIDIA_VISIBLE_DEVICES`.
#### Existing `daemon.json`
If you have a custom `/etc/docker/daemon.json`, the `nvidia-docker2` package will override it.
In this case, it is recommended to install [nvidia-container-runtime](https://github.com/nvidia/nvidia-container-runtime#installation) instead and register the new runtime manually.
#### Default runtime
The default runtime used by the Docker® Engine is [runc](https://github.com/opencontainers/runc), our runtime can become the default one by configuring the docker daemon with `--default-runtime=nvidia`.
Doing so will remove the need to add the `--runtime=nvidia` argument to `docker run`.
It is also the only way to have GPU access during `docker build`.
#### Environment variables
The behavior of the runtime can be modified through environment variables (such as `NVIDIA_VISIBLE_DEVICES`).
Those environment variables are consumed by [nvidia-container-runtime](https://github.com/nvidia/nvidia-container-runtime) and are documented [here](https://github.com/nvidia/nvidia-container-runtime#environment-variables-oci-spec).
Our official CUDA images use default values for these variables.
## Issues and Contributing
A signed copy of the [Contributor License Agreement](https://raw.githubusercontent.com/NVIDIA/nvidia-docker/master/CLA) needs to be provided to <a href="mailto:digits@nvidia.com">digits@nvidia.com</a> before any change can be accepted.
* Please let us know by [filing a new issue](https://github.com/NVIDIA/nvidia-docker/issues/new)
* You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/)

View File

@ -1,24 +0,0 @@
[Unit]
Description=NVIDIA Docker plugin
Documentation=https://github.com/NVIDIA/nvidia-docker/wiki
After=local-fs.target network.target
Wants=docker.service
[Service]
Environment="SOCK_DIR=/var/lib/nvidia-docker"
Environment="SPEC_FILE=/etc/docker/plugins/nvidia-docker.spec"
User=nvidia-docker
PermissionsStartOnly=true
Restart=on-failure
RestartSec=1
TimeoutStartSec=0
TimeoutStopSec=20
ExecStart=/usr/bin/nvidia-docker-plugin -s $SOCK_DIR
ExecStartPost=/bin/sh -c '/bin/mkdir -p $( dirname $SPEC_FILE )'
ExecStartPost=/bin/sh -c '/bin/echo unix://$SOCK_DIR/nvidia-docker.sock > $SPEC_FILE'
ExecStopPost=/bin/rm -f $SPEC_FILE
[Install]
WantedBy=multi-user.target

View File

@ -1,78 +0,0 @@
nvidia-docker (1.0.1-1) trusty; urgency=low
* Support for Docker 17.03 including EE and CE (Closes: #323, #324)
* Load UVM unconditionally
* Fix Docker argument parsing (Closes: #295)
* Fix images pull output (Closes: #310)
-- NVIDIA CORPORATION <digits@nvidia.com> Fri, 03 Mar 2017 00:59:14 +0000
nvidia-docker (1.0.0-1) trusty; urgency=low
* Support for Docker 1.13
* Fix CPU affinity reporting on systems where NUMA is disabled (Closes: #198)
* Fix premature EOF in the remote API responses (Closes: #123)
* Add support for the VolumeDriver.Capabilities plugin endpoint
* Enable ppc64le library lookup (Closes: #194)
* Fix parsing of DOCKER_HOST for unix domain sockets (Closes: #119)
-- NVIDIA CORPORATION <digits@nvidia.com> Wed, 18 Jan 2017 21:44:42 +0000
nvidia-docker (1.0.0~rc.3-1) trusty; urgency=low
* Support for Docker 1.12
* Add volume mount options support to the nvidia package
* Export the nvidia-uvm-tools device
* Provide the libcuda.so symlink as part of the driver volume (Closes: #103)
* Use relative symlinks inside the volumes
* Disable CUDA unified memory
-- NVIDIA CORPORATION <digits@nvidia.com> Fri, 17 Jun 2016 22:08:11 +0000
nvidia-docker (1.0.0~rc.2-1) trusty; urgency=low
* Allow UUIDs to be used in NV_GPU and docker/cli RestAPI endpoint
* Change the plugin usage with version information (Closes: #90)
* Remove the volume setup command (Closes: #96)
* Add support for the Pascal architecture
-- NVIDIA CORPORATION <digits@nvidia.com> Sat, 28 May 2016 00:18:44 +0000
nvidia-docker (1.0.0~rc-1) trusty; urgency=low
* Add /docker/cli/json RestAPI endpoint (Closes: #39, #91)
* Fix support for Docker 1.9 (Closes: #83)
* Handle gracefully devices unsupported by NVML (Closes: #40)
* Improve error reporting
* Support for Docker 1.11 (Closes: #89, #84, #77, #73)
* Add NVIDIA Docker version output
* Improve init scripts and add support for systemd
* Query CPU affinity through sysfs instead of NVML (Closes: #65)
* Load UVM before anything else
-- NVIDIA CORPORATION <digits@nvidia.com> Tue, 03 May 2016 17:44:36 -0700
nvidia-docker (1.0.0~beta.3-1) trusty; urgency=low
* Remove driver hard dependency (NVML)
* Improve error handling and REST API output
* Support for 364 drivers
* Preventive removal of the plugin socket
-- NVIDIA CORPORATION <digits@nvidia.com> Mon, 28 Mar 2016 16:48:51 -0700
nvidia-docker (1.0.0~beta.2-1) trusty; urgency=low
* Support for Docker 1.10 (Closes: #46)
* Support for Docker plugin API v1.2
* Support for 361 drivers
* Add copy strategy for cross-device volumes (Closes: #47)
-- NVIDIA CORPORATION <digits@nvidia.com> Mon, 07 Mar 2016 11:41:21 -0800
nvidia-docker (1.0.0~beta-1) trusty; urgency=low
* Initial release (Closes: #33)
-- NVIDIA CORPORATION <digits@nvidia.com> Mon, 08 Feb 2016 11:17:52 -0800

View File

@ -1,16 +0,0 @@
Source: #PACKAGE#
Section: devel
Priority: optional
Maintainer: #USERNAME# <#EMAIL#>
Build-Depends: #BUILD_DEPS#, dh-systemd
Standards-Version: #POLICY#
Homepage: https://github.com/NVIDIA/nvidia-docker/wiki
Vcs-Git: https://github.com/NVIDIA/nvidia-docker
Vcs-Browser: https://github.com/NVIDIA/nvidia-docker
Package: #PACKAGE#
Architecture: #ARCHITECTURE#
Depends: ${misc:Depends}, ${shlibs:Depends}, adduser, docker-engine (>= 1.9.0) | docker-ce | docker-ee, libcap2-bin
Description: NVIDIA Docker container tools
NVIDIA Docker provides utilities to extend the Docker CLI allowing users
to build and run GPU applications as lightweight containers.

View File

@ -1,36 +0,0 @@
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: #PACKAGE#
Source: https://github.com/NVIDIA/nvidia-docker
Files: *
Copyright: #YEAR# #USERNAME# <#EMAIL#>
License: BSD-3-Clause
Files: debian/*
Copyright: #YEAR# #USERNAME# <#EMAIL#>
License: BSD-3-Clause
License: BSD-3-Clause
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of #USERNAME# nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,2 +0,0 @@
# NVIDIA Docker plugin daemon options
NVIDIA_DOCKER_PLUGIN_OPTS="-s /var/lib/nvidia-docker"

View File

@ -1 +0,0 @@
/var/lib/nvidia-docker

View File

@ -1 +0,0 @@
#PACKAGE#/* /usr/bin

View File

@ -1,3 +0,0 @@
improbable-bug-number-in-closes
hardening-no-relro
binary-without-manpage

View File

@ -1,32 +0,0 @@
#!/bin/sh
set -e
NVIDIA_DOCKER_USER=#PACKAGE#
NVIDIA_DOCKER_ROOT=/var/lib/nvidia-docker
NVIDIA_DOCKER_PLUGIN=/usr/bin/nvidia-docker-plugin
case "$1" in
configure)
if [ -z "$2" ]; then
echo "Configuring user"
id -u "$NVIDIA_DOCKER_USER" >/dev/null 2>&1 || \
useradd -r -M -d "$NVIDIA_DOCKER_ROOT" -s /usr/sbin/nologin -c "NVIDIA Docker plugin" "$NVIDIA_DOCKER_USER"
fi
echo "Setting up permissions"
chown "$NVIDIA_DOCKER_USER": "$NVIDIA_DOCKER_ROOT"
setcap cap_fowner+pe "$NVIDIA_DOCKER_PLUGIN"
;;
abort-upgrade|abort-remove|abort-deconfigure)
;;
*)
echo "postinst called with unknown argument \`$1'" >&2
exit 1
;;
esac
#DEBHELPER#
exit 0

View File

@ -1,24 +0,0 @@
#!/bin/sh
set -e
NVIDIA_DOCKER_USER=#PACKAGE#
case "$1" in
purge)
id -u "$NVIDIA_DOCKER_USER" >/dev/null 2>&1 && \
userdel "$NVIDIA_DOCKER_USER"
;;
upgrade|failed-upgrade|remove|abort-install|abort-upgrade|disappear)
;;
*)
echo "postrm called with unknown argument \`$1'" >&2
exit 1
;;
esac
#DEBHELPER#
exit 0

View File

@ -1,27 +0,0 @@
#!/bin/sh
set -e
NVIDIA_DOCKER_DRIVER=#PACKAGE#
NVIDIA_DOCKER_ROOT=/var/lib/nvidia-docker
case "$1" in
remove)
echo "Purging NVIDIA volumes"
docker volume ls | awk -v drv="$NVIDIA_DOCKER_DRIVER" '{if ($1 == drv) print $2}' | xargs -r docker volume rm ||
echo "Failed to remove NVIDIA volumes, ignoring"
find "$NVIDIA_DOCKER_ROOT" ! -wholename "$NVIDIA_DOCKER_ROOT" -type d -empty -delete || true
;;
upgrade|deconfigure|failed-upgrade)
;;
*)
echo "prerm called with unknown argument \`$1'" >&2
exit 1
;;
esac
#DEBHELPER#
exit 0

View File

@ -1 +0,0 @@
../common/nvidia-docker.service

View File

@ -1,42 +0,0 @@
description "NVIDIA Docker plugin"
start on (local-filesystems and net-device-up)
stop on runlevel [!2345]
normal exit 0 KILL TERM
respawn
respawn limit 5 10
kill timeout 20
env NVIDIA_DOCKER_USER=#PACKAGE#
env NVIDIA_DOCKER_PLUGIN=/usr/bin/nvidia-docker-plugin
env NVIDIA_DOCKER_PLUGIN_SPEC=/etc/docker/plugins/nvidia-docker.spec
script
if [ -f /etc/default/$UPSTART_JOB ]; then
. /etc/default/$UPSTART_JOB
fi
OPTS="$NVIDIA_DOCKER_PLUGIN_OPTS"
exec start-stop-daemon -S -u "$NVIDIA_DOCKER_USER" -c "$NVIDIA_DOCKER_USER" \
-a "$NVIDIA_DOCKER_PLUGIN" -- $OPTS
end script
post-start script
if [ -f /etc/default/$UPSTART_JOB ]; then
. /etc/default/$UPSTART_JOB
fi
OPTS="$NVIDIA_DOCKER_PLUGIN_OPTS"
SOCK_DIR=$( echo $OPTS | grep -oP -- '-s\s+\K\S+' )
SOCK_FILE=unix://$SOCK_DIR/nvidia-docker.sock
if [ -n "$SOCK_DIR" ]; then
mkdir -p $( dirname "$NVIDIA_DOCKER_PLUGIN_SPEC" )
echo "$SOCK_FILE" > "$NVIDIA_DOCKER_PLUGIN_SPEC"
fi
end script
post-stop script
rm -f "$NVIDIA_DOCKER_PLUGIN_SPEC"
end script

View File

@ -1,5 +0,0 @@
#! /bin/sh
set -e
sed -i "s/#ARCHITECTURE#/${ARCHITECTURE}/" debian/control

View File

@ -1,10 +0,0 @@
#!/usr/bin/make -f
# -*- makefile -*-
#export DH_VERBOSE=1
override_dh_shlibdeps:
dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info
%:
dh $@ --with=systemd

View File

@ -1 +0,0 @@
../../common/nvidia-docker.service

View File

@ -1,128 +0,0 @@
Name: %{name}
Version: %{version}
Release: %{revision}
BuildArch: %{architecture}
Group: Development Tools
Vendor: %{vendor}
Packager: %{vendor} <%{email}>
Summary: NVIDIA Docker container tools
URL: https://github.com/NVIDIA/nvidia-docker
License: BSD
Source0: %{name}_%{version}_%{architecture}.tar.xz
Source1: %{name}.service
Source2: LICENSE
%{?systemd_requires}
BuildRequires: systemd
Requires: libcap
%define nvidia_docker_user %{name}
%define nvidia_docker_driver %{name}
%define nvidia_docker_root /var/lib/nvidia-docker
%description
NVIDIA Docker provides utilities to extend the Docker CLI allowing users
to build and run GPU applications as lightweight containers.
%prep
%autosetup -n %{name}
cp %{SOURCE1} %{SOURCE2} .
%install
mkdir -p %{buildroot}%{_bindir}
mkdir -p %{buildroot}%{_unitdir}
mkdir -p %{buildroot}%{nvidia_docker_root}
install -m 755 -t %{buildroot}%{_bindir} nvidia-docker
install -m 755 -t %{buildroot}%{_bindir} nvidia-docker-plugin
install -m 644 -t %{buildroot}%{_unitdir} %{name}.service
%files
%license LICENSE
%dir %{nvidia_docker_root}
%{_bindir}/*
%{_unitdir}/*
%post
if [ $1 -eq 1 ]; then
echo "Configuring user"
id -u %{nvidia_docker_user} >/dev/null 2>&1 || \
useradd -r -M -d %{nvidia_docker_root} -s /usr/sbin/nologin -c "NVIDIA Docker plugin" %{nvidia_docker_user}
fi
echo "Setting up permissions"
chown %{nvidia_docker_user}: %{nvidia_docker_root}
setcap cap_fowner+pe %{_bindir}/nvidia-docker-plugin
%systemd_post %{name}
%preun
if [ $1 -eq 0 ]; then
echo "Purging NVIDIA volumes"
docker volume ls | awk -v drv=%{nvidia_docker_driver} '{if ($1 == drv) print $2}' | xargs -r docker volume rm ||
echo "Failed to remove NVIDIA volumes, ignoring"
find %{nvidia_docker_root} ! -wholename %{nvidia_docker_root} -type d -empty -delete
fi
%systemd_preun %{name}
%postun
if [ $1 -eq 0 ]; then
id -u %{nvidia_docker_user} >/dev/null 2>&1 && \
userdel %{nvidia_docker_user}
fi
%systemd_postun_with_restart %{name}
%changelog
* Fri Mar 03 2017 NVIDIA CORPORATION <digits@nvidia.com> 1.0.1-1
- Support for Docker 17.03 including EE and CE (Closes: #323, #324)
- Load UVM unconditionally
- Fix Docker argument parsing (Closes: #295)
- Fix images pull output (Closes: #310)
* Wed Jan 18 2017 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0-1
- Support for Docker 1.13
- Fix CPU affinity reporting on systems where NUMA is disabled (Closes: #198)
- Fix premature EOF in the remote API responses (Closes: #123)
- Add support for the VolumeDriver.Capabilities plugin endpoint
- Enable ppc64le library lookup (Closes: #194)
- Fix parsing of DOCKER_HOST for unix domain sockets (Closes: #119)
* Fri Jun 17 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~rc.3-1
- Support for Docker 1.12
- Add volume mount options support to the nvidia package
- Export the nvidia-uvm-tools device
- Provide the libcuda.so symlink as part of the driver volume (Closes: #103)
- Use relative symlinks inside the volumes
- Disable CUDA unified memory
* Sat May 28 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~rc.2-1
- Allow UUIDs to be used in NV_GPU and docker/cli RestAPI endpoint
- Change the plugin usage with version information (Closes: #90)
- Remove the volume setup command (Closes: #96)
- Add support for the Pascal architecture
* Tue May 03 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~rc-1
- Add /docker/cli/json RestAPI endpoint (Closes: #39, #91)
- Fix support for Docker 1.9 (Closes: #83)
- Handle gracefully devices unsupported by NVML (Closes: #40)
- Improve error reporting
- Support for Docker 1.11 (Closes: #89, #84, #77, #73)
- Add NVIDIA Docker version output
- Improve init scripts and add support for systemd
- Query CPU affinity through sysfs instead of NVML (Closes: #65)
- Load UVM before anything else
* Mon Mar 28 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~beta.3-1
- Remove driver hard dependency (NVML)
- Improve error handling and REST API output
- Support for 364 drivers
- Preventive removal of the plugin socket
* Mon Mar 07 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~beta.2-1
- Support for Docker 1.10 (Closes: #46)
- Support for Docker plugin API v1.2
- Support for 361 drivers
- Add copy strategy for cross-device volumes (Closes: #47)
* Mon Feb 08 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~beta-1
- Initial release (Closes: #33)

8
daemon.json Normal file
View File

@ -0,0 +1,8 @@
{
"runtimes": {
"nvidia": {
"path": "/usr/bin/nvidia-container-runtime",
"runtimeArgs": []
}
}
}

View File

18
debian/control vendored Normal file
View File

@ -0,0 +1,18 @@
Source: nvidia-docker2
Section: @SECTION@utils
Priority: optional
Maintainer: NVIDIA CORPORATION <cudatools@nvidia.com>
Standards-Version: 3.9.8
Homepage: https://github.com/NVIDIA/nvidia-docker/wiki
Vcs-Git: https://github.com/NVIDIA/nvidia-docker
Vcs-Browser: https://github.com/NVIDIA/nvidia-docker
Build-Depends: debhelper (>= 9)
Package: nvidia-docker2
Architecture: all
Breaks: nvidia-docker
Replaces: nvidia-docker
Depends: ${misc:Depends}, nvidia-container-runtime (= @RUNTIME_VERSION@), @DOCKER_VERSION@
Description: nvidia-docker CLI wrapper
Replaces nvidia-docker with a new implementation based on
nvidia-container-runtime

35
debian/copyright vendored Normal file
View File

@ -0,0 +1,35 @@
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: nvidia-docker2
Source: https://github.com/NVIDIA/nvidia-docker
Files: *
Copyright: 2017 NVIDIA CORPORATION <cudatools@nvidia.com>
License: BSD-3-Clause
License: BSD-3-clause
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
.
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
.
Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

2
debian/nvidia-docker2.install vendored Normal file
View File

@ -0,0 +1,2 @@
daemon.json /etc/docker
nvidia-* /usr/bin

View File

@ -0,0 +1,2 @@
new-package-should-close-itp-bug
binary-without-manpage

7
debian/prepare vendored Executable file
View File

@ -0,0 +1,7 @@
#! /bin/sh
set -e
sed -i "s;@SECTION@;${SECTION:+$SECTION/};g" debian/control
sed -i "s;@RUNTIME_VERSION@;${RUNTIME_VERSION};g" debian/control
sed -i "s;@DOCKER_VERSION@;${DOCKER_VERSION};g" debian/control

7
debian/rules vendored Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/make -f
# -*- makefile -*-
#export DH_VERBOSE=1
%:
dh $@

31
nvidia-docker Executable file
View File

@ -0,0 +1,31 @@
#! /bin/bash
# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
NV_DOCKER=${NV_DOCKER:-"docker"}
DOCKER_ARGS=""
NV_DOCKER_ARGS=""
while [ $# -gt 0 ]; do
arg=$1
shift
DOCKER_ARGS="$DOCKER_ARGS $arg"
case $arg in
run|create)
NV_DOCKER_ARGS="--runtime=nvidia"
if [ ! -z $NV_GPU ]; then
NV_DOCKER_ARGS="$NV_DOCKER_ARGS -e NVIDIA_VISIBLE_DEVICES=${NV_GPU// /,}"
fi
break
;;
version)
printf "NVIDIA Docker: 2.0.0\n"
break
;;
esac
done
if [ ! -z $NV_DEBUG ]; then
set -x
fi
$NV_DOCKER $DOCKER_ARGS $NV_DOCKER_ARGS "$@"

View File

@ -1,4 +1,4 @@
Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions

View File

@ -0,0 +1,39 @@
Name: nvidia-docker2
Version: %{version}
Release: %{release}
BuildArch: noarch
Group: Development Tools
Vendor: NVIDIA CORPORATION
Packager: NVIDIA CORPORATION <cudatools@nvidia.com>
Summary: nvidia-docker CLI wrapper
URL: https://github.com/NVIDIA/nvidia-docker
License: BSD
Source0: nvidia-docker
Source1: daemon.json
Source2: LICENSE
Conflicts: nvidia-docker
Requires: nvidia-container-runtime = %{runtime_version}
Requires: %{docker_version}
%description
Replaces nvidia-docker with a new implementation based on nvidia-container-runtime
%prep
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} .
%install
mkdir -p %{buildroot}%{_bindir}
install -m 755 -t %{buildroot}%{_bindir} nvidia-docker
mkdir -p %{buildroot}/etc/docker
install -m 644 -t %{buildroot}/etc/docker daemon.json
%files
%license LICENSE
%{_bindir}/nvidia-docker
/etc/docker/daemon.json
%changelog

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-centos7
RUN yum install -y \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/cache/yum/*
WORKDIR /usr/local/cuda/samples/1_Utilities/bandwidthTest
RUN make
CMD ./bandwidthTest --mode=shmoo

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-centos7
RUN yum install -y \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/cache/yum/*
WORKDIR /usr/local/cuda/samples/1_Utilities/deviceQuery
RUN make
CMD ./deviceQuery

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-centos7
RUN yum install -y \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/cache/yum/*
WORKDIR /usr/local/cuda/samples/0_Simple/matrixMulCUBLAS
RUN make
CMD ./matrixMulCUBLAS -sizemult=10

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-centos7
RUN yum install -y \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/cache/yum/*
WORKDIR /usr/local/cuda/samples/5_Simulations/nbody
RUN make
CMD ./nbody -benchmark

View File

@ -1,3 +0,0 @@
FROM nvidia/cuda:8.0-devel-centos7
CMD nvidia-smi -q

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-centos7
RUN yum install -y \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/cache/yum/*
WORKDIR /usr/local/cuda/samples/0_Simple/vectorAdd
RUN make
CMD ./vectorAdd

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/lib/apt/lists/*
WORKDIR /usr/local/cuda/samples/1_Utilities/bandwidthTest
RUN make
CMD ./bandwidthTest --mode=shmoo

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/lib/apt/lists/*
WORKDIR /usr/local/cuda/samples/1_Utilities/deviceQuery
RUN make
CMD ./deviceQuery

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/lib/apt/lists/*
WORKDIR /usr/local/cuda/samples/0_Simple/matrixMulCUBLAS
RUN make
CMD ./matrixMulCUBLAS -sizemult=10

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/lib/apt/lists/*
WORKDIR /usr/local/cuda/samples/5_Simulations/nbody
RUN make
CMD ./nbody -benchmark

View File

@ -1,3 +0,0 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
CMD nvidia-smi -q

View File

@ -1,10 +0,0 @@
FROM nvidia/cuda:8.0-devel-ubuntu16.04
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-samples-$CUDA_PKG_VERSION && \
rm -rf /var/lib/apt/lists/*
WORKDIR /usr/local/cuda/samples/0_Simple/vectorAdd
RUN make
CMD ./vectorAdd

View File

@ -1,82 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package cuda
// #cgo LDFLAGS: -lcudart_static -ldl -lrt
// #include <stdlib.h>
// #include <cuda_runtime_api.h>
import "C"
import (
"fmt"
"unsafe"
)
type handle struct{ dev C.int }
type deviceProp struct {
major int
minor int
multiProcessorCount uint
ECCEnabled bool
totalGlobalMem uint
sharedMemPerMultiprocessor uint
totalConstMem uint
l2CacheSize uint
memoryClockRate uint
memoryBusWidth uint
}
func errorString(ret C.cudaError_t) error {
if ret == C.cudaSuccess {
return nil
}
err := C.GoString(C.cudaGetErrorString(ret))
return fmt.Errorf("cuda: %v", err)
}
func driverGetVersion() (int, error) {
var driver C.int
r := C.cudaDriverGetVersion(&driver)
return int(driver), errorString(r)
}
func deviceGetByPCIBusId(busid string) (handle, error) {
var dev C.int
id := C.CString(busid)
r := C.cudaDeviceGetByPCIBusId(&dev, id)
C.free(unsafe.Pointer(id))
return handle{dev}, errorString(r)
}
func deviceCanAccessPeer(h1, h2 handle) (bool, error) {
var ok C.int
r := C.cudaDeviceCanAccessPeer(&ok, h1.dev, h2.dev)
return (ok != 0), errorString(r)
}
func deviceReset() error {
return errorString(C.cudaDeviceReset())
}
func (h handle) getDeviceProperties() (*deviceProp, error) {
var props C.struct_cudaDeviceProp
r := C.cudaGetDeviceProperties(&props, h.dev)
p := &deviceProp{
major: int(props.major),
minor: int(props.minor),
multiProcessorCount: uint(props.multiProcessorCount),
ECCEnabled: bool(props.ECCEnabled != 0),
totalGlobalMem: uint(props.totalGlobalMem),
sharedMemPerMultiprocessor: uint(props.sharedMemPerMultiprocessor),
totalConstMem: uint(props.totalConstMem),
l2CacheSize: uint(props.l2CacheSize),
memoryClockRate: uint(props.memoryClockRate),
memoryBusWidth: uint(props.memoryBusWidth),
}
return p, errorString(r)
}

View File

@ -1,120 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package cuda
import (
"fmt"
)
type MemoryInfo struct {
ECC *bool
Global *uint
Shared *uint
Constant *uint
L2Cache *uint
Bandwidth *uint
}
type Device struct {
handle
Family *string
Arch *string
Cores *uint
Memory MemoryInfo
}
func archFamily(arch string) *string {
m := map[string]string{
"1": "Tesla",
"2": "Fermi",
"3": "Kepler",
"5": "Maxwell",
"6": "Pascal",
}
f, ok := m[arch[:1]]
if !ok {
return nil
}
return &f
}
func archSMCores(arch string) *uint {
m := map[string]uint{
"1.0": 8, // Tesla Generation (SM 1.0) G80 class
"1.1": 8, // Tesla Generation (SM 1.1) G8x G9x class
"1.2": 8, // Tesla Generation (SM 1.2) GT21x class
"1.3": 8, // Tesla Generation (SM 1.3) GT20x class
"2.0": 32, // Fermi Generation (SM 2.0) GF100 GF110 class
"2.1": 48, // Fermi Generation (SM 2.1) GF10x GF11x class
"3.0": 192, // Kepler Generation (SM 3.0) GK10x class
"3.2": 192, // Kepler Generation (SM 3.2) TK1 class
"3.5": 192, // Kepler Generation (SM 3.5) GK11x GK20x class
"3.7": 192, // Kepler Generation (SM 3.7) GK21x class
"5.0": 128, // Maxwell Generation (SM 5.0) GM10x class
"5.2": 128, // Maxwell Generation (SM 5.2) GM20x class
"5.3": 128, // Maxwell Generation (SM 5.3) TX1 class
"6.0": 64, // Pascal Generation (SM 6.0) GP100 class
"6.1": 128, // Pascal Generation (SM 6.1) GP10x class
"6.2": 128, // Pascal Generation (SM 6.2) GP10x class
}
c, ok := m[arch]
if !ok {
return nil
}
return &c
}
func GetDriverVersion() (string, error) {
d, err := driverGetVersion()
return fmt.Sprintf("%d.%d", d/1000, d%100/10), err
}
func NewDevice(busid string) (device *Device, err error) {
h, err := deviceGetByPCIBusId(busid)
if err != nil {
return nil, err
}
props, err := h.getDeviceProperties()
if err != nil {
return nil, err
}
arch := fmt.Sprintf("%d.%d", props.major, props.minor)
family := archFamily(arch)
cores := archSMCores(arch)
bw := 2 * (props.memoryClockRate / 1000) * (props.memoryBusWidth / 8)
// Destroy the active CUDA context
if err := deviceReset(); err != nil {
return nil, err
}
device = &Device{
handle: h,
Family: family,
Arch: &arch,
Cores: cores,
Memory: MemoryInfo{
ECC: &props.ECCEnabled,
Global: &props.totalGlobalMem,
Shared: &props.sharedMemPerMultiprocessor,
Constant: &props.totalConstMem,
L2Cache: &props.l2CacheSize,
Bandwidth: &bw, // MB/s
},
}
if cores != nil {
*device.Cores *= props.multiProcessorCount
}
*device.Memory.Global /= 1024 * 1024 // MiB
*device.Memory.Shared /= 1024 // KiB
*device.Memory.Constant /= 1024 // KiB
*device.Memory.L2Cache /= 1024 // KiB
return
}
func CanAccessPeer(dev1, dev2 *Device) (bool, error) {
return deviceCanAccessPeer(dev1.handle, dev2.handle)
}

View File

@ -1,234 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package docker
import (
"bytes"
"encoding/json"
"fmt"
"os"
"os/exec"
"strings"
"syscall"
)
var dockerCmd = []string{"docker"}
func SetCommand(cmd ...string) {
if len(cmd) > 0 {
dockerCmd = cmd
}
}
func docker(stdout bool, command string, arg ...string) (b []byte, err error) {
var buf bytes.Buffer
args := append(append(dockerCmd[1:], command), arg...)
cmd := exec.Command(dockerCmd[0], args...)
cmd.Stderr = &buf
if stdout {
cmd.Stdout = os.Stderr
err = cmd.Run()
} else {
b, err = cmd.Output()
}
if err != nil {
b = bytes.TrimSpace(buf.Bytes())
b = bytes.TrimPrefix(b, []byte("Error: "))
if len(b) > 0 {
return nil, fmt.Errorf("%s", b)
} else {
return nil, fmt.Errorf("failed to run docker command")
}
}
return b, nil
}
// List of boolean options: https://github.com/docker/docker/blob/17.03.x/contrib/completion/bash/docker
var lastSupportedVersion = "17.03"
var booleanFlags = map[string]map[string][]string{
"1.9": {
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
"-ip-masq", "-iptables", "-ipv6", "-selinux-enabled", "-userland-proxy"},
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
"-detach", "d", "-rm", "-sig-proxy"},
},
"1.10": {
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
"-ip-masq", "-iptables", "-ipv6", "-selinux-enabled", "-userland-proxy"},
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
"-detach", "d", "-rm", "-sig-proxy"},
},
"1.11": {
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
"-ip-masq", "-iptables", "-ipv6", "-raw-logs", "-selinux-enabled", "-userland-proxy"},
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
"-detach", "d", "-rm", "-sig-proxy"},
},
"1.12": {
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
"-ip-masq", "-iptables", "-ipv6", "-live-restore", "-raw-logs",
"-selinux-enabled", "-userland-proxy"},
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
"-detach", "d", "-no-healthcheck", "-rm", "-sig-proxy"},
},
"1.13": {
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
"-disable-legacy-registry", "-experimental", "-help", "-icc", "-init", "-ip-forward",
"-ip-masq", "-iptables", "-ipv6", "-live-restore", "-raw-logs",
"-selinux-enabled", "-userland-proxy"},
"create": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
"run": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
"-detach", "d", "-no-healthcheck", "-rm", "-sig-proxy"},
},
lastSupportedVersion: {
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
"-disable-legacy-registry", "-experimental", "-help", "-icc", "-init", "-ip-forward",
"-ip-masq", "-iptables", "-ipv6", "-live-restore", "-raw-logs",
"-selinux-enabled", "-userland-proxy"},
"create": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
"run": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
"-detach", "d", "-no-healthcheck", "-rm", "-sig-proxy"},
},
}
func ParseArgs(args []string, cmd ...string) (string, int, error) {
if len(cmd) == 0 {
cmd = append(cmd, "")
}
version, err := ClientVersion()
if err != nil {
return "", -1, err
}
vmaj := version[:strings.LastIndex(version, ".")]
cmdBooleanFlags, ok := booleanFlags[vmaj][cmd[0]]
if !ok {
// Docker is newer than supported version: use flags from last version we know.
cmdBooleanFlags, _ = booleanFlags[lastSupportedVersion][cmd[0]]
}
// Build the set of boolean Docker options for this command
type void struct{}
flags := make(map[string]void)
for _, f := range cmdBooleanFlags {
flags[f] = void{}
}
for i := 0; i < len(args); i++ {
arg := args[i]
if arg[0] != '-' || arg == "-" {
return args[i], i, nil
}
// Skip if current flag is in the form --option=value
// Note: doesn't handle weird commands like `nvidia-docker run -vit=XYZ /tmp:/bar ubuntu`
if strings.Contains(arg, "=") {
continue
}
arg = arg[1:]
if arg[0] == '-' {
// Long option: skip next argument if option is not boolean
if _, ok := flags[arg]; !ok {
i++
}
} else {
// Short options: skip next argument if any option is not boolean
for _, f := range arg {
if _, ok := flags[string(f)]; !ok {
i++
break
}
}
}
}
return "", -1, nil
}
func Label(image, label string) (string, error) {
format := fmt.Sprintf(`--format={{index .Config.Labels "%s"}}`, label)
b, err := docker(false, "inspect", format, image)
if err != nil {
return "", err
}
return string(bytes.Trim(b, " \n")), nil
}
func VolumeInspect(name string) (string, error) {
var vol []struct{ Name, Driver, Mountpoint string }
b, err := docker(false, "volume", "inspect", name)
if err != nil {
return "", err
}
if err := json.Unmarshal(b, &vol); err != nil {
return "", err
}
return vol[0].Mountpoint, nil
}
func ImageExists(image string) (bool, error) {
_, err := docker(false, "inspect", "--type=image", image)
if err != nil {
// We can't know whether the image was missing or if the daemon was unreachable.
return false, nil
}
return true, nil
}
func ImagePull(image string) error {
_, err := docker(true, "pull", image)
return err
}
func ClientVersion() (string, error) {
b, err := docker(false, "version", "--format", "{{.Client.Version}}")
if err != nil {
return "", err
}
version := string(b)
var v1, v2, v3 int
if _, err := fmt.Sscanf(version, "%d.%d.%d", &v1, &v2, &v3); err != nil {
return "", err
}
return version, nil
}
func Docker(arg ...string) error {
cmd, err := exec.LookPath(dockerCmd[0])
if err != nil {
return err
}
args := append(dockerCmd, arg...)
return syscall.Exec(cmd, args, os.Environ())
}

View File

@ -1,104 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package graceful
import (
"net"
"net/http"
"os"
"sync"
"time"
middleware "github.com/justinas/alice"
"gopkg.in/tylerb/graceful.v1"
)
const timeout = 5 * time.Second
type HTTPServer struct {
sync.Mutex
network string
router *http.ServeMux
server *graceful.Server
err error
}
func recovery(handler http.Handler) http.Handler {
f := func(w http.ResponseWriter, r *http.Request) {
defer func() {
if recover() != nil {
http.Error(w, "internal error, check logs for details", http.StatusInternalServerError)
}
}()
handler.ServeHTTP(w, r)
}
return http.HandlerFunc(f)
}
func NewHTTPServer(net, addr string, mw ...middleware.Constructor) *HTTPServer {
r := http.NewServeMux()
return &HTTPServer{
network: net,
router: r,
server: &graceful.Server{
Timeout: timeout,
Server: &http.Server{
Addr: addr,
Handler: middleware.New(recovery).Append(mw...).Then(r),
ReadTimeout: timeout,
WriteTimeout: timeout,
},
},
}
}
func (s *HTTPServer) Handle(method, route string, handler http.HandlerFunc) {
f := func(w http.ResponseWriter, r *http.Request) {
if r.Method != method {
http.NotFound(w, r)
return
}
handler.ServeHTTP(w, r)
}
s.router.HandleFunc(route, f)
}
func (s *HTTPServer) Serve() <-chan struct{} {
if s.network == "unix" {
os.Remove(s.server.Addr)
}
l, err := net.Listen(s.network, s.server.Addr)
if err != nil {
s.Lock()
s.err = err
s.Unlock()
c := make(chan struct{})
close(c)
return c
}
c := s.server.StopChan()
go func() {
s.Lock()
defer s.Unlock()
err = s.server.Serve(l)
if e, ok := err.(*net.OpError); !ok || (ok && e.Op != "accept") {
s.err = err
}
}()
return c
}
func (s *HTTPServer) Stop() {
s.server.Stop(timeout)
}
func (s *HTTPServer) Error() error {
s.Lock()
defer s.Unlock()
return s.err
}

View File

@ -1,196 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package ldcache
import (
"bytes"
"encoding/binary"
"errors"
"os"
"path/filepath"
"syscall"
"unsafe"
)
const ldcachePath = "/etc/ld.so.cache"
const (
magicString1 = "ld.so-1.7.0"
magicString2 = "glibc-ld.so.cache"
magicVersion = "1.1"
)
const (
flagTypeMask = 0x00ff
flagTypeELF = 0x0001
flagArchMask = 0xff00
flagArchI386 = 0x0000
flagArchX8664 = 0x0300
flagArchX32 = 0x0800
flagArchPpc64le = 0x0500
)
var ErrInvalidCache = errors.New("invalid ld.so.cache file")
type Header1 struct {
Magic [len(magicString1) + 1]byte // include null delimiter
NLibs uint32
}
type Entry1 struct {
Flags int32
Key, Value uint32
}
type Header2 struct {
Magic [len(magicString2)]byte
Version [len(magicVersion)]byte
NLibs uint32
TableSize uint32
_ [3]uint32 // unused
_ uint64 // force 8 byte alignment
}
type Entry2 struct {
Flags int32
Key, Value uint32
OSVersion uint32
HWCap uint64
}
type LDCache struct {
*bytes.Reader
data, libs []byte
header Header2
entries []Entry2
}
func Open() (*LDCache, error) {
f, err := os.Open(ldcachePath)
if err != nil {
return nil, err
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return nil, err
}
d, err := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()),
syscall.PROT_READ, syscall.MAP_PRIVATE)
if err != nil {
return nil, err
}
cache := &LDCache{data: d, Reader: bytes.NewReader(d)}
return cache, cache.parse()
}
func (c *LDCache) Close() error {
return syscall.Munmap(c.data)
}
func (c *LDCache) Magic() string {
return string(c.header.Magic[:])
}
func (c *LDCache) Version() string {
return string(c.header.Version[:])
}
func strn(b []byte, n int) string {
return string(b[:n])
}
func (c *LDCache) parse() error {
var header Header1
// Check for the old format (< glibc-2.2)
if c.Len() <= int(unsafe.Sizeof(header)) {
return ErrInvalidCache
}
if strn(c.data, len(magicString1)) == magicString1 {
if err := binary.Read(c, binary.LittleEndian, &header); err != nil {
return err
}
n := int64(header.NLibs) * int64(unsafe.Sizeof(Entry1{}))
offset, err := c.Seek(n, 1) // skip old entries
if err != nil {
return err
}
n = (-offset) & int64(unsafe.Alignof(c.header)-1)
_, err = c.Seek(n, 1) // skip padding
if err != nil {
return err
}
}
c.libs = c.data[c.Size()-int64(c.Len()):] // kv offsets start here
if err := binary.Read(c, binary.LittleEndian, &c.header); err != nil {
return err
}
if c.Magic() != magicString2 || c.Version() != magicVersion {
return ErrInvalidCache
}
c.entries = make([]Entry2, c.header.NLibs)
if err := binary.Read(c, binary.LittleEndian, &c.entries); err != nil {
return err
}
return nil
}
func (c *LDCache) Lookup(libs ...string) (paths32, paths64 []string) {
type void struct{}
var paths *[]string
set := make(map[string]void)
prefix := make([][]byte, len(libs))
for i := range libs {
prefix[i] = []byte(libs[i])
}
for _, e := range c.entries {
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
continue
}
switch e.Flags & flagArchMask {
case flagArchX8664:
fallthrough
case flagArchPpc64le:
paths = &paths64
case flagArchX32:
fallthrough
case flagArchI386:
paths = &paths32
default:
continue
}
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
continue
}
lib := c.libs[e.Key:]
value := c.libs[e.Value:]
for _, p := range prefix {
if bytes.HasPrefix(lib, p) {
n := bytes.IndexByte(value, 0)
if n < 0 {
break
}
path, err := filepath.EvalSymlinks(strn(value, n))
if err != nil {
break
}
if _, ok := set[path]; ok {
break
}
set[path] = void{}
*paths = append(*paths, path)
break
}
}
}
return
}

View File

@ -1,110 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"flag"
"fmt"
"log"
"os"
"runtime"
"runtime/debug"
"github.com/NVIDIA/nvidia-docker/src/nvidia"
)
var (
PrintVersion bool
ListenAddr string
VolumesPath string
SocketPath string
Version string
Devices []nvidia.Device
Volumes nvidia.VolumeMap
)
func init() {
log.SetPrefix(os.Args[0] + " | ")
flag.BoolVar(&PrintVersion, "v", false, "Show the plugin version information")
flag.StringVar(&ListenAddr, "l", "localhost:3476", "Server listen address")
flag.StringVar(&VolumesPath, "d", "/var/lib/nvidia-docker/volumes", "Path where to store the volumes")
flag.StringVar(&SocketPath, "s", "/run/docker/plugins", "Path to the plugin socket")
}
func assert(err error) {
if err != nil {
log.Panicln("Error:", err)
}
}
func exit() {
if err := recover(); err != nil {
if _, ok := err.(runtime.Error); ok {
log.Println(err)
}
if os.Getenv("NV_DEBUG") != "" {
log.Printf("%s", debug.Stack())
}
os.Exit(1)
}
os.Exit(0)
}
func main() {
var err error
flag.Parse()
defer exit()
if PrintVersion {
fmt.Printf("NVIDIA Docker plugin: %s\n", Version)
return
}
log.Println("Loading NVIDIA unified memory")
assert(nvidia.LoadUVM())
log.Println("Loading NVIDIA management library")
assert(nvidia.Init())
defer func() { assert(nvidia.Shutdown()) }()
log.Println("Discovering GPU devices")
Devices, err = nvidia.LookupDevices()
assert(err)
log.Println("Provisioning volumes at", VolumesPath)
Volumes, err = nvidia.LookupVolumes(VolumesPath)
assert(err)
plugin := NewPluginAPI(SocketPath)
remote := NewRemoteAPI(ListenAddr)
log.Println("Serving plugin API at", SocketPath)
log.Println("Serving remote API at", ListenAddr)
p := plugin.Serve()
r := remote.Serve()
join, joined := make(chan int, 2), 0
L:
for {
select {
case <-p:
remote.Stop()
p = nil
join <- 1
case <-r:
plugin.Stop()
r = nil
join <- 1
case j := <-join:
if joined += j; joined == cap(join) {
break L
}
}
}
assert(plugin.Error())
assert(remote.Error())
log.Println("Successfully terminated")
}

View File

@ -1,75 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"encoding/json"
"log"
"net/http"
"os"
"path"
"github.com/NVIDIA/nvidia-docker/src/graceful"
"github.com/NVIDIA/nvidia-docker/src/nvidia"
)
const socketName = nvidia.DockerPlugin + ".sock"
type plugin interface {
implement() string
register(*PluginAPI)
}
type PluginAPI struct {
*graceful.HTTPServer
plugins []plugin
}
func accept(handler http.Handler) http.Handler {
f := func(w http.ResponseWriter, r *http.Request) {
h := r.Header.Get("Accept")
if h != "application/vnd.docker.plugins.v1.1+json" &&
h != "application/vnd.docker.plugins.v1.2+json" {
log.Println("Unsupported plugin API", h)
w.WriteHeader(http.StatusNotAcceptable)
return
}
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
handler.ServeHTTP(w, r)
}
return http.HandlerFunc(f)
}
func NewPluginAPI(prefix string) *PluginAPI {
os.MkdirAll(prefix, 0700)
a := &PluginAPI{
HTTPServer: graceful.NewHTTPServer("unix", path.Join(prefix, socketName), accept),
}
a.Handle("POST", "/Plugin.Activate", a.activate)
a.register(
new(pluginVolume),
)
return a
}
func (a *PluginAPI) register(plugins ...plugin) {
for _, p := range plugins {
p.register(a)
a.plugins = append(a.plugins, p)
}
}
func (a *PluginAPI) activate(resp http.ResponseWriter, req *http.Request) {
r := struct{ Implements []string }{}
log.Println("Received activate request")
r.Implements = make([]string, len(a.plugins))
for i, p := range a.plugins {
r.Implements[i] = p.implement()
}
assert(json.NewEncoder(resp).Encode(r))
log.Println("Plugins activated", r.Implements)
}

View File

@ -1,207 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"encoding/json"
"errors"
"fmt"
"log"
"net/http"
"path"
"regexp"
"github.com/NVIDIA/nvidia-docker/src/nvidia"
)
var (
ErrVolumeBadFormat = errors.New("bad volume format")
ErrVolumeUnsupported = errors.New("unsupported volume")
ErrVolumeNotFound = errors.New("no such volume")
ErrVolumeVersion = errors.New("invalid volume version")
)
type pluginVolume struct{}
func (p *pluginVolume) implement() string { return "VolumeDriver" }
func (p *pluginVolume) register(api *PluginAPI) {
prefix := "/" + p.implement()
api.Handle("POST", prefix+".Create", p.create)
api.Handle("POST", prefix+".Remove", p.remove)
api.Handle("POST", prefix+".Mount", p.mount)
api.Handle("POST", prefix+".Unmount", p.unmount)
api.Handle("POST", prefix+".Path", p.path)
api.Handle("POST", prefix+".Get", p.get)
api.Handle("POST", prefix+".List", p.list)
api.Handle("POST", prefix+".Capabilities", p.capabilities)
}
func fmtError(err error, vol string) *string {
s := fmt.Sprintf("%v: %s", err, vol)
return &s
}
func getVolume(name string) (*nvidia.Volume, string, error) {
re := regexp.MustCompile("^([a-zA-Z0-9_.-]+)_([0-9.]+)$")
m := re.FindStringSubmatch(name)
if len(m) != 3 {
return nil, "", ErrVolumeBadFormat
}
volume, version := Volumes[m[1]], m[2]
if volume == nil {
return nil, "", ErrVolumeUnsupported
}
return volume, version, nil
}
func (p *pluginVolume) create(resp http.ResponseWriter, req *http.Request) {
var q struct{ Name string }
var r struct{ Err *string }
assert(json.NewDecoder(req.Body).Decode(&q))
log.Printf("Received create request for volume '%s'\n", q.Name)
volume, version, err := getVolume(q.Name)
if err != nil {
r.Err = fmtError(err, q.Name)
assert(json.NewEncoder(resp).Encode(r))
return
}
// The volume version requested needs to match the volume version in cache
if version != volume.Version {
r.Err = fmtError(ErrVolumeVersion, q.Name)
assert(json.NewEncoder(resp).Encode(r))
return
}
ok, err := volume.Exists()
assert(err)
if !ok {
assert(volume.Create(nvidia.LinkStrategy{}))
}
assert(json.NewEncoder(resp).Encode(r))
}
func (p *pluginVolume) remove(resp http.ResponseWriter, req *http.Request) {
var q struct{ Name string }
var r struct{ Err *string }
assert(json.NewDecoder(req.Body).Decode(&q))
log.Printf("Received remove request for volume '%s'\n", q.Name)
volume, version, err := getVolume(q.Name)
if err != nil {
r.Err = fmtError(err, q.Name)
} else {
assert(volume.Remove(version))
}
assert(json.NewEncoder(resp).Encode(r))
}
func (p *pluginVolume) mount(resp http.ResponseWriter, req *http.Request) {
var q struct{ Name string }
var r struct{ Mountpoint, Err *string }
assert(json.NewDecoder(req.Body).Decode(&q))
log.Printf("Received mount request for volume '%s'\n", q.Name)
volume, version, err := getVolume(q.Name)
if err != nil {
r.Err = fmtError(err, q.Name)
assert(json.NewEncoder(resp).Encode(r))
return
}
ok, err := volume.Exists(version)
assert(err)
if !ok {
r.Err = fmtError(ErrVolumeNotFound, q.Name)
} else {
p := path.Join(volume.Path, version)
r.Mountpoint = &p
}
assert(json.NewEncoder(resp).Encode(r))
}
func (p *pluginVolume) unmount(resp http.ResponseWriter, req *http.Request) {
var q struct{ Name string }
var r struct{ Err *string }
assert(json.NewDecoder(req.Body).Decode(&q))
log.Printf("Received unmount request for volume '%s'\n", q.Name)
_, _, err := getVolume(q.Name)
if err != nil {
r.Err = fmtError(err, q.Name)
}
assert(json.NewEncoder(resp).Encode(r))
}
func (p *pluginVolume) path(resp http.ResponseWriter, req *http.Request) {
p.mount(resp, req)
}
func (p *pluginVolume) get(resp http.ResponseWriter, req *http.Request) {
type Volume struct{ Name, Mountpoint string }
var q struct{ Name string }
var r struct {
Volume *Volume
Err *string
}
assert(json.NewDecoder(req.Body).Decode(&q))
volume, version, err := getVolume(q.Name)
if err != nil {
r.Err = fmtError(err, q.Name)
assert(json.NewEncoder(resp).Encode(r))
return
}
ok, err := volume.Exists(version)
assert(err)
if !ok {
r.Err = fmtError(ErrVolumeNotFound, q.Name)
} else {
r.Volume = &Volume{
Name: q.Name,
Mountpoint: path.Join(volume.Path, version),
}
}
assert(json.NewEncoder(resp).Encode(r))
}
func (p *pluginVolume) list(resp http.ResponseWriter, req *http.Request) {
type Volume struct{ Name, Mountpoint string }
var r struct {
Volumes []Volume
Err *string
}
for _, vol := range Volumes {
versions, err := vol.ListVersions()
assert(err)
for _, v := range versions {
r.Volumes = append(r.Volumes, Volume{
Name: fmt.Sprintf("%s_%s", vol.Name, v),
Mountpoint: path.Join(vol.Path, v),
})
}
}
assert(json.NewEncoder(resp).Encode(r))
}
func (p *pluginVolume) capabilities(resp http.ResponseWriter, req *http.Request) {
type Capabilities struct{ Scope string }
var r struct {
Capabilities Capabilities
}
r.Capabilities = Capabilities{
Scope: "local",
}
assert(json.NewEncoder(resp).Encode(r))
}

View File

@ -1,58 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"net/http"
"github.com/NVIDIA/nvidia-docker/src/graceful"
)
type restapi interface {
version() string
gpuInfo(http.ResponseWriter, *http.Request)
gpuInfoJSON(http.ResponseWriter, *http.Request)
gpuStatus(http.ResponseWriter, *http.Request)
gpuStatusJSON(http.ResponseWriter, *http.Request)
dockerCLI(http.ResponseWriter, *http.Request)
dockerCLIJSON(http.ResponseWriter, *http.Request)
mesosCLI(http.ResponseWriter, *http.Request)
}
type RemoteAPI struct {
*graceful.HTTPServer
apis []restapi
}
func NewRemoteAPI(addr string) *RemoteAPI {
a := &RemoteAPI{
HTTPServer: graceful.NewHTTPServer("tcp", addr),
}
a.register(
new(remoteV10),
)
return a
}
func (a *RemoteAPI) register(apis ...restapi) {
for i, api := range apis {
prefix := "/" + api.version()
handlers:
a.Handle("GET", prefix+"/gpu/info", api.gpuInfo)
a.Handle("GET", prefix+"/gpu/info/json", api.gpuInfoJSON)
a.Handle("GET", prefix+"/gpu/status", api.gpuStatus)
a.Handle("GET", prefix+"/gpu/status/json", api.gpuStatusJSON)
a.Handle("GET", prefix+"/docker/cli", api.dockerCLI)
a.Handle("GET", prefix+"/docker/cli/json", api.dockerCLIJSON)
a.Handle("GET", prefix+"/mesos/cli", api.mesosCLI)
if i == len(apis)-1 && prefix != "" {
prefix = ""
goto handlers
}
a.apis = append(a.apis, api)
}
}

View File

@ -1,283 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"bytes"
"compress/zlib"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"text/tabwriter"
"text/template"
"github.com/NVIDIA/nvidia-docker/src/nvidia"
)
type remoteV10 struct{}
func (r *remoteV10) version() string { return "v1.0" }
func (r *remoteV10) gpuInfo(resp http.ResponseWriter, req *http.Request) {
const tpl = `
Driver version: {{driverVersion}}
Supported CUDA version: {{cudaVersion}}
{{range $i, $e := .}}
Device #{{$i}}
Model: {{or .Model "N/A"}}
UUID: {{.UUID}}
Path: {{.Path}}
Family: {{or .Family "N/A"}}
Arch: {{or .Arch "N/A"}}
Cores: {{or .Cores "N/A"}}
Power: {{if .Power}}{{.Power}} W{{else}}N/A{{end}}
CPU Affinity: {{if .CPUAffinity}}NUMA node{{.CPUAffinity}}{{else}}N/A{{end}}
PCI
Bus ID: {{.PCI.BusID}}
BAR1: {{if .PCI.BAR1}}{{.PCI.BAR1}} MiB{{else}}N/A{{end}}
Bandwidth: {{if .PCI.Bandwidth}}{{.PCI.Bandwidth}} MB/s{{else}}N/A{{end}}
Memory
ECC: {{or .Memory.ECC "N/A"}}
Global: {{if .Memory.Global}}{{.Memory.Global}} MiB{{else}}N/A{{end}}
Constant: {{if .Memory.Constant}}{{.Memory.Constant}} KiB{{else}}N/A{{end}}
Shared: {{if .Memory.Shared}}{{.Memory.Shared}} KiB{{else}}N/A{{end}}
L2 Cache: {{if .Memory.L2Cache}}{{.Memory.L2Cache}} KiB{{else}}N/A{{end}}
Bandwidth: {{if .Memory.Bandwidth}}{{.Memory.Bandwidth}} MB/s{{else}}N/A{{end}}
Clocks
Cores: {{if .Clocks.Cores}}{{.Clocks.Cores}} MHz{{else}}N/A{{end}}
Memory: {{if .Clocks.Memory}}{{.Clocks.Memory}} MHz{{else}}N/A{{end}}
P2P Available{{if not .Topology}}: None{{else}}{{range .Topology}}
{{.BusID}} - {{(.Link.String)}}{{end}}{{end}}
{{end}}
`
m := template.FuncMap{
"driverVersion": nvidia.GetDriverVersion,
"cudaVersion": nvidia.GetCUDAVersion,
}
t := template.Must(template.New("").Funcs(m).Parse(tpl))
w := tabwriter.NewWriter(resp, 0, 4, 0, ' ', 0)
assert(t.Execute(w, Devices))
assert(w.Flush())
}
func (r *remoteV10) gpuInfoJSON(resp http.ResponseWriter, req *http.Request) {
var body bytes.Buffer
writeGPUInfoJSON(&body)
resp.Header().Set("Content-Type", "application/json")
_, err := body.WriteTo(resp)
assert(err)
}
func writeGPUInfoJSON(wr io.Writer) {
var err error
r := struct {
Version struct{ Driver, CUDA string }
Devices []nvidia.Device
}{
Devices: Devices,
}
r.Version.Driver, err = nvidia.GetDriverVersion()
assert(err)
r.Version.CUDA, err = nvidia.GetCUDAVersion()
assert(err)
assert(json.NewEncoder(wr).Encode(r))
}
func (r *remoteV10) gpuStatus(resp http.ResponseWriter, req *http.Request) {
const tpl = `{{range $i, $e := .}}{{$s := (.Status)}}
Device #{{$i}}
Power: {{if and $s.Power .Power}}{{$s.Power}} / {{.Power}} W{{else}}N/A{{end}}
Temperature: {{if $s.Temperature}}{{$s.Temperature}} °C{{else}}N/A{{end}}
Utilization
GPU: {{if $s.Utilization.GPU}}{{$s.Utilization.GPU}} %{{else}}N/A{{end}}
Memory: {{if $s.Utilization.Memory}}{{$s.Utilization.Memory}} %{{else}}N/A{{end}}
Encoder: {{if $s.Utilization.Encoder}}{{$s.Utilization.Encoder}} %{{else}}N/A{{end}}
Decoder: {{if $s.Utilization.Decoder}}{{$s.Utilization.Decoder}} %{{else}}N/A{{end}}
Memory
Global: {{if and $s.Memory.GlobalUsed .Memory.Global}}{{$s.Memory.GlobalUsed}} / {{.Memory.Global}} MiB{{else}}N/A{{end}}
ECC Errors
L1 Cache: {{or $s.Memory.ECCErrors.L1Cache "N/A"}}
L2 Cache: {{or $s.Memory.ECCErrors.L2Cache "N/A"}}
Global: {{or $s.Memory.ECCErrors.Global "N/A"}}
PCI
BAR1: {{if and $s.PCI.BAR1Used .PCI.BAR1}}{{$s.PCI.BAR1Used}} / {{.PCI.BAR1}} MiB{{else}}N/A{{end}}
Throughput
RX: {{if $s.PCI.Throughput.RX}}{{$s.PCI.Throughput.RX}} MB/s{{else}}N/A{{end}}
TX: {{if $s.PCI.Throughput.TX}}{{$s.PCI.Throughput.TX}} MB/s{{else}}N/A{{end}}
Clocks
Cores: {{if $s.Clocks.Cores}}{{$s.Clocks.Cores}} MHz{{else}}N/A{{end}}
Memory: {{if $s.Clocks.Memory}}{{$s.Clocks.Memory}} MHz{{else}}N/A{{end}}
Processes{{if not $s.Processes}}: None{{else}}{{range $s.Processes}}
- PID: {{.PID}}
Name: {{.Name}}
Memory: {{.MemoryUsed}} MiB{{end}}{{end}}
{{end}}
`
t := template.Must(template.New("").Parse(tpl))
w := tabwriter.NewWriter(resp, 0, 4, 0, ' ', 0)
assert(t.Execute(w, Devices))
assert(w.Flush())
}
func (r *remoteV10) gpuStatusJSON(resp http.ResponseWriter, req *http.Request) {
var body bytes.Buffer
writeGPUStatusJSON(&body)
resp.Header().Set("Content-Type", "application/json")
_, err := body.WriteTo(resp)
assert(err)
}
func writeGPUStatusJSON(wr io.Writer) {
status := make([]*nvidia.DeviceStatus, 0, len(Devices))
for i := range Devices {
s, err := Devices[i].Status()
assert(err)
status = append(status, s)
}
r := struct{ Devices []*nvidia.DeviceStatus }{status}
assert(json.NewEncoder(wr).Encode(r))
}
func (r *remoteV10) dockerCLI(resp http.ResponseWriter, req *http.Request) {
const tpl = "--volume-driver={{.VolumeDriver}}{{range .Volumes}} --volume={{.}}{{end}}" +
"{{range .Devices}} --device={{.}}{{end}}"
devs := strings.Split(req.FormValue("dev"), " ")
vols := strings.Split(req.FormValue("vol"), " ")
args, err := dockerCLIArgs(devs, vols)
if err != nil {
http.Error(resp, err.Error(), http.StatusBadRequest)
return
}
t := template.Must(template.New("").Parse(tpl))
assert(t.Execute(resp, args))
}
func (r *remoteV10) dockerCLIJSON(resp http.ResponseWriter, req *http.Request) {
devs := strings.Split(req.FormValue("dev"), " ")
vols := strings.Split(req.FormValue("vol"), " ")
args, err := dockerCLIArgs(devs, vols)
if err != nil {
http.Error(resp, err.Error(), http.StatusBadRequest)
return
}
resp.Header().Set("Content-Type", "application/json")
assert(json.NewEncoder(resp).Encode(args))
}
type dockerArgs struct {
VolumeDriver string
Volumes []string
Devices []string
}
func dockerCLIArgs(devs, vols []string) (*dockerArgs, error) {
cdevs, err := nvidia.GetControlDevicePaths()
if err != nil {
return nil, err
}
devs, err = dockerCLIDevices(devs)
if err != nil {
return nil, err
}
vols, err = dockerCLIVolumes(vols)
if err != nil {
return nil, err
}
return &dockerArgs{
VolumeDriver: nvidia.DockerPlugin,
Volumes: vols,
Devices: append(cdevs, devs...),
}, nil
}
func dockerCLIDevices(ids []string) ([]string, error) {
devs := make([]string, 0, len(Devices))
if len(ids) == 1 && (ids[0] == "*" || ids[0] == "") {
for i := range Devices {
devs = append(devs, Devices[i].Path)
}
} else {
d, err := nvidia.FilterDevices(Devices, ids)
if err != nil {
return nil, err
}
for i := range d {
devs = append(devs, d[i].Path)
}
}
return devs, nil
}
func dockerCLIVolumes(names []string) ([]string, error) {
vols := make([]string, 0, len(Volumes))
drv, err := nvidia.GetDriverVersion()
if err != nil {
return nil, err
}
if len(names) == 1 && (names[0] == "*" || names[0] == "") {
for _, v := range Volumes {
vols = append(vols, fmt.Sprintf("%s_%s:%s:%s", v.Name, drv, v.Mountpoint, v.MountOptions))
}
} else {
for _, n := range names {
v, ok := Volumes[n]
if !ok {
return nil, fmt.Errorf("invalid volume: %s", n)
}
vols = append(vols, fmt.Sprintf("%s_%s:%s:%s", v.Name, drv, v.Mountpoint, v.MountOptions))
}
}
return vols, nil
}
func (r *remoteV10) mesosCLI(resp http.ResponseWriter, req *http.Request) {
const format = "--attributes=gpus:%s --resources=gpus:{%s}"
// Generate Mesos attributes
var b bytes.Buffer
writeGPUInfoJSON(&b)
attr := base64Encode(zlibCompress(b.Bytes()))
// Generate Mesos custom resources
uuids := make([]string, 0, len(Devices))
for i := range Devices {
uuids = append(uuids, Devices[i].UUID)
}
res := strings.Join(uuids, ",")
_, err := fmt.Fprintf(resp, format, attr, res)
assert(err)
}
func zlibCompress(buf []byte) []byte {
b := bytes.NewBuffer(make([]byte, 0, len(buf)))
w := zlib.NewWriter(b)
_, err := w.Write(buf)
assert(err)
err = w.Close()
assert(err)
return b.Bytes()
}
func base64Encode(buf []byte) string {
s := base64.URLEncoding.EncodeToString(buf)
if n := len(buf) % 3; n > 0 {
s = s[:len(s)-(3-n)] // remove padding (RFC 6920)
}
return s
}

View File

@ -1,125 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"errors"
"fmt"
"log"
"net/url"
"os"
"regexp"
"strings"
"github.com/NVIDIA/nvidia-docker/src/docker"
)
const (
envDockerHost = "DOCKER_HOST"
envNVDocker = "NV_DOCKER"
envNVHost = "NV_HOST"
envNVGPU = "NV_GPU"
)
var ErrInvalidURI = errors.New("invalid remote host URI")
func LoadEnvironment() (err error) {
Host, err = getHost()
if err != nil {
return
}
GPU = getGPU()
cmd := getDocker()
docker.SetCommand(cmd...)
return
}
func parseAddr(addr string) (host, sport, hport string) {
re := regexp.MustCompile(`^(\[[0-9a-f.:]+\]|[0-9A-Za-z.\-_]+)?(:\d+)?:(\d+)?$`)
host, sport, hport = "localhost", "22", "3476"
if addr == "" {
return
}
m := re.FindStringSubmatch(addr)
if m == nil {
return "", "", ""
}
if m[1] != "" {
host = m[1]
}
if m[2] != "" {
sport = m[2][1:]
}
if m[3] != "" {
hport = m[3]
}
return
}
func getHost() (*url.URL, error) {
var env string
nvhost := os.Getenv(envNVHost)
dhost := os.Getenv(envDockerHost)
if nvhost != "" {
env = nvhost
} else if dhost != "" {
env = dhost
} else {
return nil, nil
}
if nvhost != "" && dhost == "" {
log.Printf("Warning: %s is set but %s is not\n", envNVHost, envDockerHost)
}
if ok, _ := regexp.MatchString("^[a-z0-9+.-]+://", env); !ok {
env = "tcp://" + env
}
uri, err := url.Parse(env)
if err != nil {
return nil, ErrInvalidURI
}
if uri.Scheme == "unix" {
return nil, nil
}
host, sport, hport := parseAddr(uri.Host)
if host == "" {
return nil, ErrInvalidURI
}
switch uri.Scheme {
case "tcp":
uri.Scheme = "http"
fallthrough
case "http":
if nvhost == "" && dhost != "" {
hport = "3476"
}
uri.Host = fmt.Sprintf("%s:%s", host, hport)
return uri, nil
case "ssh":
uri.Host = fmt.Sprintf("%s:%s", host, sport)
uri.Opaque = fmt.Sprintf("localhost:%s", hport)
if uri.User == nil {
uri.User = url.UserPassword(os.Getenv("USER"), "")
}
return uri, nil
}
return nil, ErrInvalidURI
}
func getGPU() []string {
return strings.FieldsFunc(os.Getenv(envNVGPU), func(c rune) bool {
return c == ' ' || c == ','
})
}
func getDocker() []string {
return strings.Fields(os.Getenv(envNVDocker))
}

View File

@ -1,87 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"fmt"
"github.com/NVIDIA/nvidia-docker/src/docker"
"github.com/NVIDIA/nvidia-docker/src/nvidia"
)
func GenerateLocalArgs(image string, vols []string) ([]string, error) {
cv, err := nvidia.GetCUDAVersion()
if err != nil {
return nil, err
}
if err := cudaSupported(image, cv); err != nil {
return nil, err
}
d, err := devicesArgs()
if err != nil {
return nil, err
}
v, err := volumesArgs(vols)
if err != nil {
return nil, err
}
return append(d, v...), nil
}
func devicesArgs() ([]string, error) {
var args []string
cdevs, err := nvidia.GetControlDevicePaths()
if err != nil {
return nil, err
}
for i := range cdevs {
args = append(args, fmt.Sprintf("--device=%s", cdevs[i]))
}
devs, err := nvidia.LookupDevices(nvidia.LookupMinimal)
if err != nil {
return nil, err
}
if len(GPU) == 0 {
for i := range devs {
args = append(args, fmt.Sprintf("--device=%s", devs[i].Path))
}
} else {
devs, err := nvidia.FilterDevices(devs, GPU)
if err != nil {
return nil, err
}
for i := range devs {
args = append(args, fmt.Sprintf("--device=%s", devs[i].Path))
}
}
return args, nil
}
func volumesArgs(vols []string) ([]string, error) {
args := make([]string, 0, len(vols))
drv, err := nvidia.GetDriverVersion()
if err != nil {
return nil, err
}
for _, vol := range nvidia.Volumes {
for _, v := range vols {
if v == vol.Name {
// Check if the volume exists locally otherwise fallback to using the plugin
n := fmt.Sprintf("%s_%s", vol.Name, drv)
if _, err := docker.VolumeInspect(n); err == nil {
args = append(args, fmt.Sprintf("--volume=%s:%s:%s", n, vol.Mountpoint, vol.MountOptions))
} else {
args = append(args, fmt.Sprintf("--volume-driver=%s", nvidia.DockerPlugin))
args = append(args, fmt.Sprintf("--volume=%s:%s:%s", n, vol.Mountpoint, vol.MountOptions))
}
break
}
}
}
return args, nil
}

View File

@ -1,92 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"fmt"
"log"
"net/url"
"os"
"runtime"
"runtime/debug"
"github.com/NVIDIA/nvidia-docker/src/docker"
"github.com/NVIDIA/nvidia-docker/src/nvidia"
)
var (
Version string
Host *url.URL
GPU []string
)
func init() {
log.SetPrefix(os.Args[0] + " | ")
}
func assert(err error) {
if err != nil {
log.Panicln("Error:", err)
}
}
func exit() {
if err := recover(); err != nil {
if _, ok := err.(runtime.Error); ok {
log.Println(err)
}
if os.Getenv("NV_DEBUG") != "" {
log.Printf("%s", debug.Stack())
}
os.Exit(1)
}
os.Exit(0)
}
func main() {
args := os.Args[1:]
defer exit()
assert(LoadEnvironment())
command, off, err := docker.ParseArgs(args)
assert(err)
if command == "container" && off+1 < len(args) {
command = args[off+1]
off += 1
}
if command != "create" && command != "run" {
if command == "version" {
fmt.Printf("NVIDIA Docker: %s\n\n", Version)
}
assert(docker.Docker(args...))
}
opt, i, err := docker.ParseArgs(args[off+1:], command)
assert(err)
off += i + 1
if (command == "create" || command == "run") && opt != "" {
vols, err := VolumesNeeded(opt)
assert(err)
if vols != nil {
var nargs []string
var err error
if Host != nil {
nargs, err = GenerateRemoteArgs(opt, vols)
} else {
assert(nvidia.LoadUVM())
assert(nvidia.Init())
nargs, err = GenerateLocalArgs(opt, vols)
nvidia.Shutdown()
}
assert(err)
args = append(args[:off], append(nargs, args[off:]...)...)
}
}
assert(docker.Docker(args...))
}

View File

@ -1,105 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net"
"net/http"
"net/url"
"os"
"strings"
"syscall"
"time"
"golang.org/x/crypto/ssh"
"golang.org/x/crypto/ssh/agent"
"golang.org/x/crypto/ssh/terminal"
)
const timeout = 10 * time.Second
const (
endpointInfo = "http://plugin/gpu/info/json"
endpointCLI = "http://plugin/docker/cli"
)
func GenerateRemoteArgs(image string, vols []string) ([]string, error) {
var info struct {
Version struct{ CUDA string }
}
c := httpClient(Host)
r, err := c.Get(endpointInfo)
if err != nil {
return nil, err
}
defer r.Body.Close()
if err := json.NewDecoder(r.Body).Decode(&info); err != nil {
return nil, err
}
if err := cudaSupported(image, info.Version.CUDA); err != nil {
return nil, err
}
uri := fmt.Sprintf("%s?vol=%s&dev=%s", endpointCLI,
strings.Join(vols, "+"),
strings.Join(GPU, "+"),
)
r2, err := c.Get(uri)
if err != nil {
return nil, err
}
defer r2.Body.Close()
b, err := ioutil.ReadAll(r2.Body)
if err != nil {
return nil, err
}
return strings.Split(string(b), " "), nil
}
func httpClient(addr *url.URL) *http.Client {
dial := func(string, string) (net.Conn, error) {
if addr.Scheme == "ssh" {
c, err := ssh.Dial("tcp", addr.Host, &ssh.ClientConfig{
User: addr.User.Username(),
Auth: sshAuths(addr),
})
if err != nil {
return nil, err
}
return c.Dial("tcp", addr.Opaque)
}
return net.Dial("tcp", addr.Host)
}
return &http.Client{
Timeout: timeout,
Transport: &http.Transport{Dial: dial},
}
}
func sshAuths(addr *url.URL) (methods []ssh.AuthMethod) {
if sock := os.Getenv("SSH_AUTH_SOCK"); sock != "" {
c, err := net.Dial("unix", sock)
if err != nil {
log.Println("Warning: failed to contact the local SSH agent")
} else {
auth := ssh.PublicKeysCallback(agent.NewClient(c).Signers)
methods = append(methods, auth)
}
}
auth := ssh.PasswordCallback(func() (string, error) {
fmt.Printf("%s@%s password: ", addr.User.Username(), addr.Host)
b, err := terminal.ReadPassword(int(syscall.Stdin))
fmt.Print("\n")
return string(b), err
})
methods = append(methods, auth)
return
}

View File

@ -1,59 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package main
import (
"fmt"
"strings"
"github.com/NVIDIA/nvidia-docker/src/docker"
)
const (
labelCUDAVersion = "com.nvidia.cuda.version"
labelVolumesNeeded = "com.nvidia.volumes.needed"
)
func VolumesNeeded(image string) ([]string, error) {
ok, err := docker.ImageExists(image)
if err != nil {
return nil, err
}
if !ok {
if err = docker.ImagePull(image); err != nil {
return nil, err
}
}
label, err := docker.Label(image, labelVolumesNeeded)
if err != nil {
return nil, err
}
if label == "" {
return nil, nil
}
return strings.Split(label, " "), nil
}
func cudaSupported(image, version string) error {
var vmaj, vmin int
var lmaj, lmin int
label, err := docker.Label(image, labelCUDAVersion)
if err != nil {
return err
}
if label == "" {
return nil
}
if _, err := fmt.Sscanf(version, "%d.%d", &vmaj, &vmin); err != nil {
return err
}
if _, err := fmt.Sscanf(label, "%d.%d", &lmaj, &lmin); err != nil {
return err
}
if lmaj > vmaj || (lmaj == vmaj && lmin > vmin) {
return fmt.Errorf("unsupported CUDA version: driver %s < image %s", version, label)
}
return nil
}

View File

@ -1,130 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package nvidia
import (
"fmt"
"strconv"
"strings"
"github.com/NVIDIA/nvidia-docker/src/cuda"
"github.com/NVIDIA/nvidia-docker/src/nvml"
)
type NVMLDevice nvml.Device
type CUDADevice cuda.Device
type Device struct {
*NVMLDevice
*CUDADevice
}
type NVMLDeviceStatus nvml.DeviceStatus
type DeviceStatus struct {
*NVMLDeviceStatus
}
type LookupStrategy uint
const (
LookupMinimal LookupStrategy = iota
)
func (d *Device) Status() (*DeviceStatus, error) {
s, err := (*nvml.Device)(d.NVMLDevice).Status()
if err != nil {
return nil, err
}
return &DeviceStatus{(*NVMLDeviceStatus)(s)}, nil
}
func LookupDevices(s ...LookupStrategy) (devs []Device, err error) {
var i uint
n, err := nvml.GetDeviceCount()
if err != nil {
return nil, err
}
devs = make([]Device, 0, n)
if n == 0 {
return
}
if len(s) == 1 && s[0] == LookupMinimal {
for i = 0; i < n; i++ {
d, err := nvml.NewDeviceLite(i)
if err != nil {
return nil, err
}
devs = append(devs, Device{(*NVMLDevice)(d), &CUDADevice{}})
}
return
}
for i = 0; i < n; i++ {
nd, err := nvml.NewDevice(i)
if err != nil {
return nil, err
}
cd, err := cuda.NewDevice(nd.PCI.BusID)
if err != nil {
return nil, err
}
devs = append(devs, Device{(*NVMLDevice)(nd), (*CUDADevice)(cd)})
}
for i = 0; i < n-1; i++ {
for j := i + 1; j < n; j++ {
ok, err := cuda.CanAccessPeer(
(*cuda.Device)(devs[i].CUDADevice),
(*cuda.Device)(devs[j].CUDADevice),
)
if err != nil {
return nil, err
}
if ok {
l, err := nvml.GetP2PLink(
(*nvml.Device)(devs[i].NVMLDevice),
(*nvml.Device)(devs[j].NVMLDevice),
)
if err != nil {
return nil, err
}
devs[i].Topology = append(devs[i].Topology, nvml.P2PLink{devs[j].PCI.BusID, l})
devs[j].Topology = append(devs[j].Topology, nvml.P2PLink{devs[i].PCI.BusID, l})
}
}
}
return
}
func FilterDevices(devs []Device, ids []string) ([]Device, error) {
type void struct{}
set := make(map[int]void)
loop:
for _, id := range ids {
if strings.HasPrefix(id, "GPU-") {
for i := range devs {
if strings.HasPrefix(devs[i].UUID, id) {
set[i] = void{}
continue loop
}
}
} else {
i, err := strconv.Atoi(id)
if err == nil && i >= 0 && i < len(devs) {
set[i] = void{}
continue loop
}
}
return nil, fmt.Errorf("invalid device: %s", id)
}
d := make([]Device, 0, len(set))
for i := range set {
d = append(d, devs[i])
}
return d, nil
}

View File

@ -1,61 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package nvidia
import (
"errors"
"os"
"os/exec"
"github.com/NVIDIA/nvidia-docker/src/cuda"
"github.com/NVIDIA/nvidia-docker/src/nvml"
)
const (
DockerPlugin = "nvidia-docker"
DeviceCtl = "/dev/nvidiactl"
DeviceUVM = "/dev/nvidia-uvm"
DeviceUVMTools = "/dev/nvidia-uvm-tools"
)
func Init() error {
if err := os.Setenv("CUDA_DISABLE_UNIFIED_MEMORY", "1"); err != nil {
return err
}
if err := os.Setenv("CUDA_CACHE_DISABLE", "1"); err != nil {
return err
}
if err := os.Unsetenv("CUDA_VISIBLE_DEVICES"); err != nil {
return err
}
return nvml.Init()
}
func Shutdown() error {
return nvml.Shutdown()
}
func LoadUVM() error {
if exec.Command("nvidia-modprobe", "-u", "-c=0").Run() != nil {
return errors.New("Could not load UVM kernel module. Is nvidia-modprobe installed?")
}
return nil
}
func GetDriverVersion() (string, error) {
return nvml.GetDriverVersion()
}
func GetCUDAVersion() (string, error) {
return cuda.GetDriverVersion()
}
func GetControlDevicePaths() ([]string, error) {
devs := []string{DeviceCtl, DeviceUVM}
_, err := os.Stat(DeviceUVMTools)
if os.IsNotExist(err) {
return devs, nil
}
return append(devs, DeviceUVMTools), err
}

View File

@ -1,388 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package nvidia
import (
"bufio"
"bytes"
"debug/elf"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"regexp"
"strings"
"github.com/NVIDIA/nvidia-docker/src/ldcache"
)
const (
binDir = "bin"
lib32Dir = "lib"
lib64Dir = "lib64"
)
type components map[string][]string
type volumeDir struct {
name string
files []string
}
type VolumeInfo struct {
Name string
Mountpoint string
MountOptions string
Components components
}
type Volume struct {
*VolumeInfo
Path string
Version string
dirs []volumeDir
}
type VolumeMap map[string]*Volume
type FileCloneStrategy interface {
Clone(src, dst string) error
}
type LinkStrategy struct{}
func (s LinkStrategy) Clone(src, dst string) error {
return os.Link(src, dst)
}
type LinkOrCopyStrategy struct{}
func (s LinkOrCopyStrategy) Clone(src, dst string) error {
// Prefer hard link, fallback to copy
err := os.Link(src, dst)
if err != nil {
err = Copy(src, dst)
}
return err
}
func Copy(src, dst string) error {
s, err := os.Open(src)
if err != nil {
return err
}
defer s.Close()
fi, err := s.Stat()
if err != nil {
return err
}
d, err := os.Create(dst)
if err != nil {
return err
}
if _, err := io.Copy(d, s); err != nil {
d.Close()
return err
}
if err := d.Chmod(fi.Mode()); err != nil {
d.Close()
return err
}
return d.Close()
}
var Volumes = []VolumeInfo{
{
"nvidia_driver",
"/usr/local/nvidia",
"ro",
components{
"binaries": {
//"nvidia-modprobe", // Kernel module loader
//"nvidia-settings", // X server settings
//"nvidia-xconfig", // X xorg.conf editor
"nvidia-cuda-mps-control", // Multi process service CLI
"nvidia-cuda-mps-server", // Multi process service server
"nvidia-debugdump", // GPU coredump utility
"nvidia-persistenced", // Persistence mode utility
"nvidia-smi", // System management interface
},
"libraries": {
// ------- X11 -------
//"libnvidia-cfg.so", // GPU configuration (used by nvidia-xconfig)
//"libnvidia-gtk2.so", // GTK2 (used by nvidia-settings)
//"libnvidia-gtk3.so", // GTK3 (used by nvidia-settings)
//"libnvidia-wfb.so", // Wrapped software rendering module for X server
//"libglx.so", // GLX extension module for X server
// ----- Compute -----
"libnvidia-ml.so", // Management library
"libcuda.so", // CUDA driver library
"libnvidia-ptxjitcompiler.so", // PTX-SASS JIT compiler (used by libcuda)
"libnvidia-fatbinaryloader.so", // fatbin loader (used by libcuda)
"libnvidia-opencl.so", // NVIDIA OpenCL ICD
"libnvidia-compiler.so", // NVVM-PTX compiler for OpenCL (used by libnvidia-opencl)
//"libOpenCL.so", // OpenCL ICD loader
// ------ Video ------
"libvdpau_nvidia.so", // NVIDIA VDPAU ICD
"libnvidia-encode.so", // Video encoder
"libnvcuvid.so", // Video decoder
"libnvidia-fbc.so", // Framebuffer capture
"libnvidia-ifr.so", // OpenGL framebuffer capture
// ----- Graphic -----
// XXX In an ideal world we would only mount nvidia_* vendor specific libraries and
// install ICD loaders inside the container. However, for backward compatibility reason
// we need to mount everything. This will hopefully change once GLVND is well established.
"libGL.so", // OpenGL/GLX legacy _or_ compatibility wrapper (GLVND)
"libGLX.so", // GLX ICD loader (GLVND)
"libOpenGL.so", // OpenGL ICD loader (GLVND)
"libGLESv1_CM.so", // OpenGL ES v1 common profile legacy _or_ ICD loader (GLVND)
"libGLESv2.so", // OpenGL ES v2 legacy _or_ ICD loader (GLVND)
"libEGL.so", // EGL ICD loader
"libGLdispatch.so", // OpenGL dispatch (GLVND) (used by libOpenGL, libEGL and libGLES*)
"libGLX_nvidia.so", // OpenGL/GLX ICD (GLVND)
"libEGL_nvidia.so", // EGL ICD (GLVND)
"libGLESv2_nvidia.so", // OpenGL ES v2 ICD (GLVND)
"libGLESv1_CM_nvidia.so", // OpenGL ES v1 common profile ICD (GLVND)
"libnvidia-eglcore.so", // EGL core (used by libGLES* or libGLES*_nvidia and libEGL_nvidia)
"libnvidia-egl-wayland.so", // EGL wayland extensions (used by libEGL_nvidia)
"libnvidia-glcore.so", // OpenGL core (used by libGL or libGLX_nvidia)
"libnvidia-tls.so", // Thread local storage (used by libGL or libGLX_nvidia)
"libnvidia-glsi.so", // OpenGL system interaction (used by libEGL_nvidia)
},
},
},
}
func blacklisted(file string, obj *elf.File) (bool, error) {
lib := regexp.MustCompile(`^.*/lib([\w-]+)\.so[\d.]*$`)
glcore := regexp.MustCompile(`libnvidia-e?glcore\.so`)
gldispatch := regexp.MustCompile(`libGLdispatch\.so`)
if m := lib.FindStringSubmatch(file); m != nil {
switch m[1] {
// Blacklist EGL/OpenGL libraries issued by other vendors
case "EGL":
fallthrough
case "GLESv1_CM":
fallthrough
case "GLESv2":
fallthrough
case "GL":
deps, err := obj.DynString(elf.DT_NEEDED)
if err != nil {
return false, err
}
for _, d := range deps {
if glcore.MatchString(d) || gldispatch.MatchString(d) {
return false, nil
}
}
return true, nil
// Blacklist TLS libraries using the old ABI (!= 2.3.99)
case "nvidia-tls":
const abi = 0x6300000003
s, err := obj.Section(".note.ABI-tag").Data()
if err != nil {
return false, err
}
return binary.LittleEndian.Uint64(s[24:]) != abi, nil
}
}
return false, nil
}
func (v *Volume) Create(s FileCloneStrategy) (err error) {
root := path.Join(v.Path, v.Version)
if err = os.MkdirAll(root, 0755); err != nil {
return
}
defer func() {
if err != nil {
v.Remove()
}
}()
for _, d := range v.dirs {
vpath := path.Join(root, d.name)
if err := os.MkdirAll(vpath, 0755); err != nil {
return err
}
// For each file matching the volume components (blacklist excluded), create a hardlink/copy
// of it inside the volume directory. We also need to create soname symlinks similar to what
// ldconfig does since our volume will only show up at runtime.
for _, f := range d.files {
obj, err := elf.Open(f)
if err != nil {
return fmt.Errorf("%s: %v", f, err)
}
defer obj.Close()
ok, err := blacklisted(f, obj)
if err != nil {
return fmt.Errorf("%s: %v", f, err)
}
if ok {
continue
}
l := path.Join(vpath, path.Base(f))
if err := s.Clone(f, l); err != nil {
return err
}
soname, err := obj.DynString(elf.DT_SONAME)
if err != nil {
return fmt.Errorf("%s: %v", f, err)
}
if len(soname) > 0 {
l = path.Join(vpath, soname[0])
if err := os.Symlink(path.Base(f), l); err != nil && !os.IsExist(err) {
return err
}
// XXX Many applications (wrongly) assume that libcuda.so exists (e.g. with dlopen)
// Hardcode the libcuda symlink for the time being.
if strings.HasPrefix(soname[0], "libcuda") {
l = strings.TrimRight(l, ".0123456789")
if err := os.Symlink(path.Base(f), l); err != nil && !os.IsExist(err) {
return err
}
}
// XXX GLVND requires this symlink for indirect GLX support
// It won't be needed once we have an indirect GLX vendor neutral library.
if strings.HasPrefix(soname[0], "libGLX_nvidia") {
l = strings.Replace(l, "GLX_nvidia", "GLX_indirect", 1)
if err := os.Symlink(path.Base(f), l); err != nil && !os.IsExist(err) {
return err
}
}
}
}
}
return nil
}
func (v *Volume) Remove(version ...string) error {
vv := v.Version
if len(version) == 1 {
vv = version[0]
}
return os.RemoveAll(path.Join(v.Path, vv))
}
func (v *Volume) Exists(version ...string) (bool, error) {
vv := v.Version
if len(version) == 1 {
vv = version[0]
}
_, err := os.Stat(path.Join(v.Path, vv))
if os.IsNotExist(err) {
return false, nil
}
return true, err
}
func (v *Volume) ListVersions() ([]string, error) {
dirs, err := ioutil.ReadDir(v.Path)
if os.IsNotExist(err) {
return nil, nil
}
if err != nil {
return nil, err
}
versions := make([]string, len(dirs))
for i := range dirs {
versions[i] = dirs[i].Name()
}
return versions, nil
}
func which(bins ...string) ([]string, error) {
paths := make([]string, 0, len(bins))
out, _ := exec.Command("which", bins...).Output()
r := bufio.NewReader(bytes.NewBuffer(out))
for {
p, err := r.ReadString('\n')
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if p = strings.TrimSpace(p); !path.IsAbs(p) {
continue
}
path, err := filepath.EvalSymlinks(p)
if err != nil {
return nil, err
}
paths = append(paths, path)
}
return paths, nil
}
func LookupVolumes(prefix string) (vols VolumeMap, err error) {
drv, err := GetDriverVersion()
if err != nil {
return nil, err
}
cache, err := ldcache.Open()
if err != nil {
return nil, err
}
defer func() {
if e := cache.Close(); err == nil {
err = e
}
}()
vols = make(VolumeMap, len(Volumes))
for i := range Volumes {
vol := &Volume{
VolumeInfo: &Volumes[i],
Path: path.Join(prefix, Volumes[i].Name),
Version: drv,
}
for t, c := range vol.Components {
switch t {
case "binaries":
bins, err := which(c...)
if err != nil {
return nil, err
}
vol.dirs = append(vol.dirs, volumeDir{binDir, bins})
case "libraries":
libs32, libs64 := cache.Lookup(c...)
vol.dirs = append(vol.dirs,
volumeDir{lib32Dir, libs32},
volumeDir{lib64Dir, libs64},
)
}
}
vols[vol.Name] = vol
}
return
}

View File

@ -1,311 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package nvml
// #cgo LDFLAGS: -ldl -Wl,--unresolved-symbols=ignore-in-object-files
// #include "nvml_dl.h"
import "C"
import (
"errors"
"fmt"
)
const (
szDriver = C.NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE
szName = C.NVML_DEVICE_NAME_BUFFER_SIZE
szUUID = C.NVML_DEVICE_UUID_BUFFER_SIZE
szProcs = 32
szProcName = 64
)
type handle struct{ dev C.nvmlDevice_t }
func uintPtr(c C.uint) *uint {
i := uint(c)
return &i
}
func uint64Ptr(c C.ulonglong) *uint64 {
i := uint64(c)
return &i
}
func stringPtr(c *C.char) *string {
s := C.GoString(c)
return &s
}
func errorString(ret C.nvmlReturn_t) error {
if ret == C.NVML_SUCCESS {
return nil
}
err := C.GoString(C.nvmlErrorString(ret))
return fmt.Errorf("nvml: %v", err)
}
func init_() error {
r := C.nvmlInit_dl()
if r == C.NVML_ERROR_LIBRARY_NOT_FOUND {
return errors.New("could not load NVML library")
}
return errorString(r)
}
func shutdown() error {
return errorString(C.nvmlShutdown_dl())
}
func systemGetDriverVersion() (string, error) {
var driver [szDriver]C.char
r := C.nvmlSystemGetDriverVersion(&driver[0], szDriver)
return C.GoString(&driver[0]), errorString(r)
}
func systemGetProcessName(pid uint) (string, error) {
var proc [szProcName]C.char
r := C.nvmlSystemGetProcessName(C.uint(pid), &proc[0], szProcName)
return C.GoString(&proc[0]), errorString(r)
}
func deviceGetCount() (uint, error) {
var n C.uint
r := C.nvmlDeviceGetCount(&n)
return uint(n), errorString(r)
}
func deviceGetHandleByIndex(idx uint) (handle, error) {
var dev C.nvmlDevice_t
r := C.nvmlDeviceGetHandleByIndex(C.uint(idx), &dev)
return handle{dev}, errorString(r)
}
func deviceGetTopologyCommonAncestor(h1, h2 handle) (*uint, error) {
var level C.nvmlGpuTopologyLevel_t
r := C.nvmlDeviceGetTopologyCommonAncestor_dl(h1.dev, h2.dev, &level)
if r == C.NVML_ERROR_FUNCTION_NOT_FOUND || r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(C.uint(level)), errorString(r)
}
func (h handle) deviceGetName() (*string, error) {
var name [szName]C.char
r := C.nvmlDeviceGetName(h.dev, &name[0], szName)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return stringPtr(&name[0]), errorString(r)
}
func (h handle) deviceGetUUID() (*string, error) {
var uuid [szUUID]C.char
r := C.nvmlDeviceGetUUID(h.dev, &uuid[0], szUUID)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return stringPtr(&uuid[0]), errorString(r)
}
func (h handle) deviceGetPciInfo() (*string, error) {
var pci C.nvmlPciInfo_t
r := C.nvmlDeviceGetPciInfo(h.dev, &pci)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return stringPtr(&pci.busId[0]), errorString(r)
}
func (h handle) deviceGetMinorNumber() (*uint, error) {
var minor C.uint
r := C.nvmlDeviceGetMinorNumber(h.dev, &minor)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(minor), errorString(r)
}
func (h handle) deviceGetBAR1MemoryInfo() (*uint64, *uint64, error) {
var bar1 C.nvmlBAR1Memory_t
r := C.nvmlDeviceGetBAR1MemoryInfo(h.dev, &bar1)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
return uint64Ptr(bar1.bar1Total), uint64Ptr(bar1.bar1Used), errorString(r)
}
func (h handle) deviceGetPowerManagementLimit() (*uint, error) {
var power C.uint
r := C.nvmlDeviceGetPowerManagementLimit(h.dev, &power)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(power), errorString(r)
}
func (h handle) deviceGetMaxClockInfo() (*uint, *uint, error) {
var sm, mem C.uint
r := C.nvmlDeviceGetMaxClockInfo(h.dev, C.NVML_CLOCK_SM, &sm)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetMaxClockInfo(h.dev, C.NVML_CLOCK_MEM, &mem)
}
return uintPtr(sm), uintPtr(mem), errorString(r)
}
func (h handle) deviceGetMaxPcieLinkGeneration() (*uint, error) {
var link C.uint
r := C.nvmlDeviceGetMaxPcieLinkGeneration(h.dev, &link)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(link), errorString(r)
}
func (h handle) deviceGetMaxPcieLinkWidth() (*uint, error) {
var width C.uint
r := C.nvmlDeviceGetMaxPcieLinkWidth(h.dev, &width)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(width), errorString(r)
}
func (h handle) deviceGetPowerUsage() (*uint, error) {
var power C.uint
r := C.nvmlDeviceGetPowerUsage(h.dev, &power)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(power), errorString(r)
}
func (h handle) deviceGetTemperature() (*uint, error) {
var temp C.uint
r := C.nvmlDeviceGetTemperature(h.dev, C.NVML_TEMPERATURE_GPU, &temp)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(temp), errorString(r)
}
func (h handle) deviceGetUtilizationRates() (*uint, *uint, error) {
var usage C.nvmlUtilization_t
r := C.nvmlDeviceGetUtilizationRates(h.dev, &usage)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
return uintPtr(usage.gpu), uintPtr(usage.memory), errorString(r)
}
func (h handle) deviceGetEncoderUtilization() (*uint, error) {
var usage, sampling C.uint
r := C.nvmlDeviceGetEncoderUtilization(h.dev, &usage, &sampling)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(usage), errorString(r)
}
func (h handle) deviceGetDecoderUtilization() (*uint, error) {
var usage, sampling C.uint
r := C.nvmlDeviceGetDecoderUtilization(h.dev, &usage, &sampling)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uintPtr(usage), errorString(r)
}
func (h handle) deviceGetMemoryInfo() (*uint64, error) {
var mem C.nvmlMemory_t
r := C.nvmlDeviceGetMemoryInfo(h.dev, &mem)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}
return uint64Ptr(mem.used), errorString(r)
}
func (h handle) deviceGetClockInfo() (*uint, *uint, error) {
var sm, mem C.uint
r := C.nvmlDeviceGetClockInfo(h.dev, C.NVML_CLOCK_SM, &sm)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetClockInfo(h.dev, C.NVML_CLOCK_MEM, &mem)
}
return uintPtr(sm), uintPtr(mem), errorString(r)
}
func (h handle) deviceGetMemoryErrorCounter() (*uint64, *uint64, *uint64, error) {
var l1, l2, mem C.ulonglong
r := C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_L1_CACHE, &l1)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_L2_CACHE, &l2)
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_DEVICE_MEMORY, &mem)
}
return uint64Ptr(l1), uint64Ptr(l2), uint64Ptr(mem), errorString(r)
}
func (h handle) deviceGetPcieThroughput() (*uint, *uint, error) {
var rx, tx C.uint
r := C.nvmlDeviceGetPcieThroughput(h.dev, C.NVML_PCIE_UTIL_RX_BYTES, &rx)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
if r == C.NVML_SUCCESS {
r = C.nvmlDeviceGetPcieThroughput(h.dev, C.NVML_PCIE_UTIL_TX_BYTES, &tx)
}
return uintPtr(rx), uintPtr(tx), errorString(r)
}
func (h handle) deviceGetComputeRunningProcesses() ([]uint, []uint64, error) {
var procs [szProcs]C.nvmlProcessInfo_t
var count = C.uint(szProcs)
r := C.nvmlDeviceGetComputeRunningProcesses(h.dev, &count, &procs[0])
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil, nil
}
n := int(count)
pids := make([]uint, n)
mems := make([]uint64, n)
for i := 0; i < n; i++ {
pids[i] = uint(procs[i].pid)
mems[i] = uint64(procs[i].usedGpuMemory)
}
return pids, mems, errorString(r)
}

View File

@ -1,381 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
package nvml
// #include "nvml_dl.h"
import "C"
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"strconv"
"strings"
)
var (
ErrCPUAffinity = errors.New("failed to retrieve CPU affinity")
ErrUnsupportedP2PLink = errors.New("unsupported P2P link type")
ErrUnsupportedGPU = errors.New("unsupported GPU device")
)
type P2PLinkType uint
const (
P2PLinkUnknown P2PLinkType = iota
P2PLinkCrossCPU
P2PLinkSameCPU
P2PLinkHostBridge
P2PLinkMultiSwitch
P2PLinkSingleSwitch
P2PLinkSameBoard
)
type P2PLink struct {
BusID string
Link P2PLinkType
}
func (t P2PLinkType) String() string {
switch t {
case P2PLinkCrossCPU:
return "Cross CPU socket"
case P2PLinkSameCPU:
return "Same CPU socket"
case P2PLinkHostBridge:
return "Host PCI bridge"
case P2PLinkMultiSwitch:
return "Multiple PCI switches"
case P2PLinkSingleSwitch:
return "Single PCI switch"
case P2PLinkSameBoard:
return "Same board"
case P2PLinkUnknown:
}
return "N/A"
}
type ClockInfo struct {
Cores *uint
Memory *uint
}
type PCIInfo struct {
BusID string
BAR1 *uint64
Bandwidth *uint
}
type Device struct {
handle
UUID string
Path string
Model *string
Power *uint
CPUAffinity *uint
PCI PCIInfo
Clocks ClockInfo
Topology []P2PLink
}
type UtilizationInfo struct {
GPU *uint
Memory *uint
Encoder *uint
Decoder *uint
}
type PCIThroughputInfo struct {
RX *uint
TX *uint
}
type PCIStatusInfo struct {
BAR1Used *uint64
Throughput PCIThroughputInfo
}
type ECCErrorsInfo struct {
L1Cache *uint64
L2Cache *uint64
Global *uint64
}
type MemoryInfo struct {
GlobalUsed *uint64
ECCErrors ECCErrorsInfo
}
type ProcessInfo struct {
PID uint
Name string
MemoryUsed uint64
}
type DeviceStatus struct {
Power *uint
Temperature *uint
Utilization UtilizationInfo
Memory MemoryInfo
Clocks ClockInfo
PCI PCIStatusInfo
Processes []ProcessInfo
}
func assert(err error) {
if err != nil {
panic(err)
}
}
func Init() error {
return init_()
}
func Shutdown() error {
return shutdown()
}
func GetDeviceCount() (uint, error) {
return deviceGetCount()
}
func GetDriverVersion() (string, error) {
return systemGetDriverVersion()
}
func numaNode(busid string) (uint, error) {
b, err := ioutil.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", strings.ToLower(busid)))
if err != nil {
// XXX report node 0 if NUMA support isn't enabled
return 0, nil
}
node, err := strconv.ParseInt(string(bytes.TrimSpace(b)), 10, 8)
if err != nil {
return 0, fmt.Errorf("%v: %v", ErrCPUAffinity, err)
}
if node < 0 {
node = 0 // XXX report node 0 instead of NUMA_NO_NODE
}
return uint(node), nil
}
func pciBandwidth(gen, width *uint) *uint {
m := map[uint]uint{
1: 250, // MB/s
2: 500,
3: 985,
4: 1969,
}
if gen == nil || width == nil {
return nil
}
bw := m[*gen] * *width
return &bw
}
func NewDevice(idx uint) (device *Device, err error) {
defer func() {
if r := recover(); r != nil {
err = r.(error)
}
}()
h, err := deviceGetHandleByIndex(idx)
assert(err)
model, err := h.deviceGetName()
assert(err)
uuid, err := h.deviceGetUUID()
assert(err)
minor, err := h.deviceGetMinorNumber()
assert(err)
power, err := h.deviceGetPowerManagementLimit()
assert(err)
busid, err := h.deviceGetPciInfo()
assert(err)
bar1, _, err := h.deviceGetBAR1MemoryInfo()
assert(err)
pcig, err := h.deviceGetMaxPcieLinkGeneration()
assert(err)
pciw, err := h.deviceGetMaxPcieLinkWidth()
assert(err)
ccore, cmem, err := h.deviceGetMaxClockInfo()
assert(err)
if minor == nil || busid == nil || uuid == nil {
return nil, ErrUnsupportedGPU
}
path := fmt.Sprintf("/dev/nvidia%d", *minor)
node, err := numaNode(*busid)
assert(err)
device = &Device{
handle: h,
UUID: *uuid,
Path: path,
Model: model,
Power: power,
CPUAffinity: &node,
PCI: PCIInfo{
BusID: *busid,
BAR1: bar1,
Bandwidth: pciBandwidth(pcig, pciw), // MB/s
},
Clocks: ClockInfo{
Cores: ccore, // MHz
Memory: cmem, // MHz
},
}
if power != nil {
*device.Power /= 1000 // W
}
if bar1 != nil {
*device.PCI.BAR1 /= 1024 * 1024 // MiB
}
return
}
func NewDeviceLite(idx uint) (device *Device, err error) {
defer func() {
if r := recover(); r != nil {
err = r.(error)
}
}()
h, err := deviceGetHandleByIndex(idx)
assert(err)
uuid, err := h.deviceGetUUID()
assert(err)
minor, err := h.deviceGetMinorNumber()
assert(err)
busid, err := h.deviceGetPciInfo()
assert(err)
if minor == nil || busid == nil || uuid == nil {
return nil, ErrUnsupportedGPU
}
path := fmt.Sprintf("/dev/nvidia%d", *minor)
device = &Device{
handle: h,
UUID: *uuid,
Path: path,
PCI: PCIInfo{
BusID: *busid,
},
}
return
}
func (d *Device) Status() (status *DeviceStatus, err error) {
defer func() {
if r := recover(); r != nil {
err = r.(error)
}
}()
power, err := d.deviceGetPowerUsage()
assert(err)
temp, err := d.deviceGetTemperature()
assert(err)
ugpu, umem, err := d.deviceGetUtilizationRates()
assert(err)
uenc, err := d.deviceGetEncoderUtilization()
assert(err)
udec, err := d.deviceGetDecoderUtilization()
assert(err)
mem, err := d.deviceGetMemoryInfo()
assert(err)
ccore, cmem, err := d.deviceGetClockInfo()
assert(err)
_, bar1, err := d.deviceGetBAR1MemoryInfo()
assert(err)
pids, pmems, err := d.deviceGetComputeRunningProcesses()
assert(err)
el1, el2, emem, err := d.deviceGetMemoryErrorCounter()
assert(err)
pcirx, pcitx, err := d.deviceGetPcieThroughput()
assert(err)
status = &DeviceStatus{
Power: power,
Temperature: temp, // °C
Utilization: UtilizationInfo{
GPU: ugpu, // %
Memory: umem, // %
Encoder: uenc, // %
Decoder: udec, // %
},
Memory: MemoryInfo{
GlobalUsed: mem,
ECCErrors: ECCErrorsInfo{
L1Cache: el1,
L2Cache: el2,
Global: emem,
},
},
Clocks: ClockInfo{
Cores: ccore, // MHz
Memory: cmem, // MHz
},
PCI: PCIStatusInfo{
BAR1Used: bar1,
Throughput: PCIThroughputInfo{
RX: pcirx,
TX: pcitx,
},
},
}
if power != nil {
*status.Power /= 1000 // W
}
if mem != nil {
*status.Memory.GlobalUsed /= 1024 * 1024 // MiB
}
if bar1 != nil {
*status.PCI.BAR1Used /= 1024 * 1024 // MiB
}
if pcirx != nil {
*status.PCI.Throughput.RX /= 1000 // MB/s
}
if pcitx != nil {
*status.PCI.Throughput.TX /= 1000 // MB/s
}
for i := range pids {
name, err := systemGetProcessName(pids[i])
assert(err)
status.Processes = append(status.Processes, ProcessInfo{
PID: pids[i],
Name: name,
MemoryUsed: pmems[i] / (1024 * 1024), // MiB
})
}
return
}
func GetP2PLink(dev1, dev2 *Device) (link P2PLinkType, err error) {
level, err := deviceGetTopologyCommonAncestor(dev1.handle, dev2.handle)
if err != nil || level == nil {
return P2PLinkUnknown, err
}
switch *level {
case C.NVML_TOPOLOGY_INTERNAL:
link = P2PLinkSameBoard
case C.NVML_TOPOLOGY_SINGLE:
link = P2PLinkSingleSwitch
case C.NVML_TOPOLOGY_MULTIPLE:
link = P2PLinkMultiSwitch
case C.NVML_TOPOLOGY_HOSTBRIDGE:
link = P2PLinkHostBridge
case C.NVML_TOPOLOGY_CPU:
link = P2PLinkSameCPU
case C.NVML_TOPOLOGY_SYSTEM:
link = P2PLinkCrossCPU
default:
err = ErrUnsupportedP2PLink
}
return
}

View File

@ -1,46 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
#include <stddef.h>
#include <dlfcn.h>
#include "nvml_dl.h"
#define DLSYM(x, sym) \
do { \
dlerror(); \
x = dlsym(handle, #sym); \
if (dlerror() != NULL) { \
return (NVML_ERROR_FUNCTION_NOT_FOUND); \
} \
} while (0)
typedef nvmlReturn_t (*nvmlSym_t)();
static void *handle;
nvmlReturn_t NVML_DL(nvmlInit)(void)
{
handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY | RTLD_GLOBAL);
if (handle == NULL) {
return (NVML_ERROR_LIBRARY_NOT_FOUND);
}
return (nvmlInit());
}
nvmlReturn_t NVML_DL(nvmlShutdown)(void)
{
nvmlReturn_t r = nvmlShutdown();
if (r != NVML_SUCCESS) {
return (r);
}
return (dlclose(handle) ? NVML_ERROR_UNKNOWN : NVML_SUCCESS);
}
nvmlReturn_t NVML_DL(nvmlDeviceGetTopologyCommonAncestor)(
nvmlDevice_t dev1, nvmlDevice_t dev2, nvmlGpuTopologyLevel_t *info)
{
nvmlSym_t sym;
DLSYM(sym, nvmlDeviceGetTopologyCommonAncestor);
return ((*sym)(dev1, dev2, info));
}

View File

@ -1,15 +0,0 @@
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
#ifndef _NVML_DL_H_
#define _NVML_DL_H_
#include <nvml.h>
#define NVML_DL(x) x##_dl
extern nvmlReturn_t NVML_DL(nvmlInit)(void);
extern nvmlReturn_t NVML_DL(nvmlShutdown)(void);
extern nvmlReturn_t NVML_DL(nvmlDeviceGetTopologyCommonAncestor)(
nvmlDevice_t, nvmlDevice_t, nvmlGpuTopologyLevel_t *);
#endif // _NVML_DL_H_