Merge branch '2.0'
Signed-off-by: Felix Abecassis <fabecassis@nvidia.com>
This commit is contained in:
commit
fe1874942b
|
@ -6,6 +6,4 @@
|
|||
.\#*
|
||||
*~
|
||||
\#*
|
||||
bin
|
||||
dist
|
||||
samples
|
||||
|
|
160
CLA
160
CLA
|
@ -1,160 +0,0 @@
|
|||
The NVIDIA Docker
|
||||
Software Grant and Corporate Contributor License Agreement ("Agreement")
|
||||
|
||||
Thank you for your interest in the NVIDIA Docker Project (the
|
||||
"Project"). In order to clarify the intellectual property license
|
||||
granted with Contributions from any person or entity, NVIDIA
|
||||
Corporation (the “Copyright Holders") must have a Contributor License
|
||||
Agreement (CLA) on file that has been signed by each Contributor,
|
||||
indicating agreement to the license terms below. This license is
|
||||
for your protection as a Contributor as well as the protection of the
|
||||
Project and its users; it does not change your rights to use your own
|
||||
Contributions for any other purpose.
|
||||
|
||||
This version of the Agreement allows an entity (the "Corporation") to
|
||||
submit Contributions to the Project, to authorize Contributions
|
||||
submitted by its designated employees to the Project, and to grant
|
||||
copyright and patent licenses thereto to the Copyright Holders.
|
||||
|
||||
If you have not already done so, please complete and sign, then scan and
|
||||
email a pdf file of this Agreement to digits@nvidia.com.
|
||||
Please read this document carefully before signing and keep a copy for
|
||||
your records.
|
||||
|
||||
Corporation name: ________________________________________________
|
||||
|
||||
Corporation address: ________________________________________________
|
||||
|
||||
________________________________________________
|
||||
|
||||
________________________________________________
|
||||
|
||||
Point of Contact: ________________________________________________
|
||||
|
||||
E-Mail: ________________________________________________
|
||||
|
||||
Telephone: _____________________ Fax: _____________________
|
||||
|
||||
|
||||
You accept and agree to the following terms and conditions for Your
|
||||
present and future Contributions submitted to the Project. In
|
||||
return, the Copyright Holders shall not use Your Contributions in a way
|
||||
that is contrary to the public benefit or inconsistent with its nonprofit
|
||||
status and bylaws in effect at the time of the Contribution. Except
|
||||
for the license granted herein to the Copyright Holders and recipients of
|
||||
software distributed by the Copyright Holders, You reserve all right, title,
|
||||
and interest in and to Your Contributions.
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"You" (or "Your") shall mean the copyright owner or legal entity
|
||||
authorized by the copyright owner that is making this Agreement
|
||||
with the Copyright Holders. For legal entities, the entity making a
|
||||
Contribution and all other entities that control, are controlled by,
|
||||
or are under common control with that entity are considered to be a
|
||||
single Contributor. For the purposes of this definition, "control"
|
||||
means (i) the power, direct or indirect, to cause the direction or
|
||||
management of such entity, whether by contract or otherwise, or
|
||||
(ii) ownership of fifty percent (50%) or more of the outstanding
|
||||
shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"Contribution" shall mean the code, documentation or other original
|
||||
works of authorship expressly identified in Schedule B, as well as
|
||||
any original work of authorship, including
|
||||
any modifications or additions to an existing work, that is intentionally
|
||||
submitted by You to the Copyright Holders for inclusion in, or
|
||||
documentation of, any of the products owned or managed by the
|
||||
Copyright Holders (the "Work"). For the purposes of this definition,
|
||||
"submitted" means any form of electronic, verbal, or written
|
||||
communication sent to the Copyright Holders or its representatives,
|
||||
including but not limited to communication on electronic mailing
|
||||
lists, source code control systems, and issue tracking systems
|
||||
that are managed by, or on behalf of, the Copyright Holders for the
|
||||
purpose of discussing and improving the Work, but excluding
|
||||
communication that is conspicuously marked or otherwise designated
|
||||
in writing by You as "Not a Contribution."
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions
|
||||
of this Agreement, You hereby grant to the Copyright Holders and to
|
||||
recipients of software distributed by the Copyright Holders a
|
||||
perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
||||
irrevocable copyright license to reproduce, prepare derivative works
|
||||
of, publicly display, publicly perform, sublicense, and distribute
|
||||
Your Contributions and such derivative works.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this Agreement, You hereby grant to the Copyright Holders and to
|
||||
recipients of software distributed by the Copyright Holders
|
||||
a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
||||
irrevocable (except as stated in this section) patent license
|
||||
to make, have made, use, offer to sell, sell, import, and otherwise
|
||||
transfer the Work, where such license applies only to those
|
||||
patent claims licensable by You that are necessarily infringed
|
||||
by Your Contribution(s) alone or by combination of Your Contribution(s)
|
||||
with the Work to which such Contribution(s) were submitted.
|
||||
If any entity institutes patent litigation against You or any
|
||||
other entity (including a cross-claim or counterclaim in a lawsuit)
|
||||
alleging that your Contribution, or the Work to which you have
|
||||
contributed, constitutes direct or contributory patent infringement,
|
||||
then any patent licenses granted to that entity under this Agreement
|
||||
for that Contribution or Work shall terminate as of the date such
|
||||
litigation is filed.
|
||||
|
||||
4. You represent that You are legally entitled to grant the above
|
||||
license. You represent further that each employee of the
|
||||
Corporation designated on Schedule A below (or in a subsequent
|
||||
written modification to that Schedule) is authorized to submit
|
||||
Contributions on behalf of the Corporation.
|
||||
|
||||
5. You represent that each of Your Contributions is Your original
|
||||
creation (see section 7 for submissions on behalf of others).
|
||||
|
||||
6. You are not expected to provide support for Your Contributions,
|
||||
except to the extent You desire to provide support. You may provide
|
||||
support for free, for a fee, or not at all. Unless required by
|
||||
applicable law or agreed to in writing, You provide Your
|
||||
Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
|
||||
OF ANY KIND, either express or implied, including, without
|
||||
limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT,
|
||||
MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
7. Should You wish to submit work that is not Your original creation,
|
||||
You may submit it to the Copyright Holders separately from any
|
||||
Contribution, identifying the complete details of its source and
|
||||
of any license or other restriction (including, but not limited
|
||||
to, related patents, trademarks, and license agreements) of which
|
||||
you are personally aware, and conspicuously marking the work as
|
||||
"Submitted on behalf of a third-party: [named here]".
|
||||
|
||||
8. It is your responsibility to notify the Copyright Holders when any change
|
||||
is required to the list of designated employees authorized to submit
|
||||
Contributions on behalf of the Corporation, or to the Corporation's
|
||||
Point of Contact with the Copyright Holders.
|
||||
|
||||
|
||||
|
||||
Please sign: __________________________________ Date: _______________
|
||||
|
||||
Title: __________________________________
|
||||
|
||||
Corporation: __________________________________
|
||||
|
||||
|
||||
|
||||
|
||||
Schedule A
|
||||
|
||||
[Initial list of designated employees. NB: authorization is not
|
||||
tied to particular Contributions.]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Schedule B
|
||||
|
||||
[Identification of optional concurrent software grant. Would be
|
||||
left blank or omitted if there is no concurrent software grant.]
|
||||
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
FROM golang:1.5
|
||||
|
||||
RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
|
||||
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \
|
||||
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && \
|
||||
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +2 > cudasign.pub && \
|
||||
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
|
||||
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64 /" > /etc/apt/sources.list.d/cuda.list
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-cudart-dev-6-5=6.5-19 \
|
||||
cuda-misc-headers-6-5=6.5-19 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN objcopy --redefine-sym memcpy=memcpy@GLIBC_2.2.5 /usr/local/cuda-6.5/lib64/libcudart_static.a
|
||||
|
||||
RUN NVIDIA_GDK_SUM=1e32e58f69fe29ee67b845233e7aa9347f37994463252bccbc8bfc8a7104ab5a && \
|
||||
wget -O gdk.run -q http://developer.download.nvidia.com/compute/cuda/7.5/Prod/local_installers/cuda_352_39_gdk_linux.run && \
|
||||
echo "$NVIDIA_GDK_SUM gdk.run" | sha256sum -c --strict - && \
|
||||
chmod +x gdk.run && ./gdk.run --silent && rm gdk.run
|
||||
|
||||
VOLUME /go/bin
|
||||
WORKDIR /go/src/github.com/NVIDIA/nvidia-docker/src
|
||||
COPY src .
|
||||
|
||||
ENV CGO_CFLAGS "-I /usr/local/cuda-6.5/include -I /usr/include/nvidia/gdk"
|
||||
ENV CGO_LDFLAGS "-L /usr/local/cuda-6.5/lib64"
|
||||
RUN go get -v ./...
|
||||
|
||||
ARG USER_ID
|
||||
RUN useradd --non-unique --uid $USER_ID nvidia
|
||||
USER nvidia
|
||||
|
||||
ARG CR_NAME
|
||||
ARG CR_EMAIL
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ARG PKG_ARCH
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
|
||||
CMD go install -v -ldflags="-s -X main.Version=$VERSION" ./...
|
|
@ -1,53 +0,0 @@
|
|||
FROM ppc64le/golang:1.6.3
|
||||
|
||||
RUN echo "deb http://httpredir.debian.org/debian stretch main" >> /etc/apt/sources.list && \
|
||||
echo "deb http://httpredir.debian.org/debian stretch-updates main" >> /etc/apt/sources.list && \
|
||||
echo "deb http://security.debian.org stretch/updates main" >> /etc/apt/sources.list && \
|
||||
apt-get clean && apt-get update && \
|
||||
apt-get upgrade -y \
|
||||
binutils \
|
||||
libc6 \
|
||||
libc6-dev \
|
||||
libc-bin && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN NVIDIA_GPGKEY_SUM=bd841d59a27a406e513db7d405550894188a4c1cd96bf8aa4f82f1b39e0b5c1c && \
|
||||
NVIDIA_GPGKEY_FPR=889bee522da690103c4b085ed88c3d385c37d3be && \
|
||||
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/GPGKEY && \
|
||||
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +2 > cudasign.pub && \
|
||||
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
|
||||
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/ppc64el /" > /etc/apt/sources.list.d/cuda.list
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-cudart-dev-7-5=7.5-23 \
|
||||
cuda-misc-headers-7-5=7.5-23 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN NVIDIA_GDK_SUM=064678e29d39f0c21f4b66c5e2fb18ba65fd9bc3372d0b319c31cab0e791fc1c && \
|
||||
curl -fsSL -o gdk.run http://developer.download.nvidia.com/compute/cuda/7.5/Prod/gdk/gdk_linux_ppc64le_352_79_release.run && \
|
||||
echo "$NVIDIA_GDK_SUM gdk.run" | sha256sum -c --strict - && \
|
||||
chmod +x gdk.run && ./gdk.run --silent && rm gdk.run
|
||||
|
||||
VOLUME /go/bin
|
||||
WORKDIR /go/src/github.com/NVIDIA/nvidia-docker/src
|
||||
COPY src .
|
||||
|
||||
ENV CGO_CFLAGS "-I /usr/local/cuda-7.5/include -I /usr/include/nvidia/gdk"
|
||||
ENV CGO_LDFLAGS "-L /usr/local/cuda-7.5/lib64"
|
||||
|
||||
RUN go get -v ./...
|
||||
|
||||
ARG USER_ID
|
||||
RUN useradd --non-unique --uid $USER_ID nvidia
|
||||
USER nvidia
|
||||
|
||||
ARG CR_NAME
|
||||
ARG CR_EMAIL
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ARG PKG_ARCH
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
|
||||
CMD go install -v -ldflags="-s -X main.Version=$VERSION" ./...
|
|
@ -0,0 +1,36 @@
|
|||
FROM centos:7
|
||||
|
||||
# packaging dependencies
|
||||
RUN yum install -y \
|
||||
rpm-build && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
# packaging
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ARG RUNTIME_VERSION
|
||||
ARG DOCKER_VERSION
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
ENV RUNTIME_VERSION $RUNTIME_VERSION
|
||||
ENV DOCKER_VERSION $DOCKER_VERSION
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-runtime-$PKG_VERS/SOURCES
|
||||
RUN mkdir -p $DIST_DIR
|
||||
|
||||
COPY nvidia-docker $DIST_DIR
|
||||
COPY daemon.json $DIST_DIR
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY rpm .
|
||||
|
||||
CMD rpmbuild --clean -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "release $RELEASE" \
|
||||
-D "runtime_version $RUNTIME_VERSION" \
|
||||
-D "docker_version $DOCKER_VERSION" \
|
||||
SPECS/nvidia-docker2.spec && \
|
||||
mv RPMS/noarch/*.rpm /dist
|
|
@ -1,37 +0,0 @@
|
|||
FROM ubuntu:14.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
vim-nox \
|
||||
dh-make \
|
||||
dh-systemd \
|
||||
fakeroot \
|
||||
build-essential \
|
||||
devscripts && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ARG USER_ID
|
||||
ARG CR_NAME
|
||||
ARG CR_EMAIL
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ARG PKG_ARCH
|
||||
|
||||
VOLUME /dist
|
||||
VOLUME /build
|
||||
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
|
||||
|
||||
ENV DEBFULLNAME $CR_NAME
|
||||
ENV DEBEMAIL $CR_EMAIL
|
||||
ENV REVISION $PKG_VERS-$PKG_REV
|
||||
ENV ARCHITECTURE $PKG_ARCH
|
||||
|
||||
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
|
||||
USER nvidia
|
||||
|
||||
CMD tar -xf /dist/*.tar.xz && \
|
||||
read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
|
||||
dch -c /build/deb/changelog -v $REVISION --no-auto-nmu ; \
|
||||
dh_make -y -s -c bsd -d -t /build/deb -f /dist/*.tar.xz && \
|
||||
debuild --preserve-env --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
|
@ -1,39 +0,0 @@
|
|||
FROM ppc64le/ubuntu:14.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
vim-nox \
|
||||
dh-make \
|
||||
dh-systemd \
|
||||
fakeroot \
|
||||
build-essential \
|
||||
devscripts && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ARG USER_ID
|
||||
ARG CR_NAME
|
||||
ARG CR_EMAIL
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
# Unused but kept for consistency with x86
|
||||
ARG PKG_ARCH
|
||||
|
||||
VOLUME /dist
|
||||
VOLUME /build
|
||||
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
|
||||
|
||||
ENV DEBFULLNAME $CR_NAME
|
||||
ENV DEBEMAIL $CR_EMAIL
|
||||
ENV REVISION $PKG_VERS-$PKG_REV
|
||||
# Hard-coded since ppc64le doesn't have the issue of amd64 vs x86_64.
|
||||
ENV ARCHITECTURE ppc64el
|
||||
|
||||
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
|
||||
USER nvidia
|
||||
|
||||
CMD tar -xf /dist/*.tar.xz && \
|
||||
read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
|
||||
dch -c /build/deb/changelog -v $REVISION --no-auto-nmu ; \
|
||||
dh_make -y -s -c bsd -d -t /build/deb -f /dist/*.tar.xz && \
|
||||
debuild --preserve-env --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
|
@ -1,47 +0,0 @@
|
|||
FROM centos:7
|
||||
|
||||
RUN yum install -y \
|
||||
vim \
|
||||
rpm-build && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
RUN sed -i 's/include_release_info = 1/include_release_info = 0/' /usr/share/vim/vim74/ftplugin/spec.vim && \
|
||||
echo 'let g:spec_chglog_format = "%a %b %d %Y ".$VENDOR." <".$EMAIL."> ".$VERSION."-".$REVISION' >> /etc/vimrc && \
|
||||
echo 'autocmd VimEnter *.spec execute "normal \\c"' >> /etc/vimrc
|
||||
|
||||
ARG USER_ID
|
||||
ARG CR_NAME
|
||||
ARG CR_EMAIL
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ARG PKG_ARCH
|
||||
|
||||
VOLUME /dist
|
||||
VOLUME /build
|
||||
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
|
||||
|
||||
ENV VENDOR $CR_NAME
|
||||
ENV EMAIL $CR_EMAIL
|
||||
ENV NAME $PKG_NAME
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV REVISION $PKG_REV
|
||||
ENV ARCHITECTURE $PKG_ARCH
|
||||
|
||||
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
|
||||
USER nvidia
|
||||
|
||||
CMD read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
|
||||
vim /build/rpm/SPECS/$NAME.spec ; \
|
||||
cp -Lr /build/rpm/* . && \
|
||||
cp /dist/*.tar.xz SOURCES && \
|
||||
rpmbuild --clean -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "vendor $VENDOR" \
|
||||
-D "email $EMAIL" \
|
||||
-D "name $NAME" \
|
||||
-D "version $VERSION" \
|
||||
-D "revision $REVISION" \
|
||||
-D "architecture $ARCHITECTURE" \
|
||||
SPECS/$NAME.spec && \
|
||||
mv RPMS/$ARCHITECTURE/*.rpm /dist
|
|
@ -1,47 +0,0 @@
|
|||
FROM ibmcom/centos-ppc64le:7
|
||||
|
||||
RUN yum install -y \
|
||||
vim \
|
||||
rpm-build && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
RUN sed -i 's/include_release_info = 1/include_release_info = 0/' /usr/share/vim/vim74/ftplugin/spec.vim && \
|
||||
echo 'let g:spec_chglog_format = "%a %b %d %Y ".$VENDOR." <".$EMAIL."> ".$VERSION."-".$REVISION' >> /etc/vimrc && \
|
||||
echo 'autocmd VimEnter *.spec execute "normal \\c"' >> /etc/vimrc
|
||||
|
||||
ARG USER_ID
|
||||
ARG CR_NAME
|
||||
ARG CR_EMAIL
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ARG PKG_ARCH
|
||||
|
||||
VOLUME /dist
|
||||
VOLUME /build
|
||||
WORKDIR /tmp/$PKG_NAME-$PKG_VERS
|
||||
|
||||
ENV VENDOR $CR_NAME
|
||||
ENV EMAIL $CR_EMAIL
|
||||
ENV NAME $PKG_NAME
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV REVISION $PKG_REV
|
||||
ENV ARCHITECTURE $PKG_ARCH
|
||||
|
||||
RUN useradd --non-unique --uid $USER_ID nvidia && chown nvidia: .
|
||||
USER nvidia
|
||||
|
||||
CMD read -p "Update changelog (y/n)? " yn && [ "$yn" = "y" ] && \
|
||||
vim /build/rpm/SPECS/$NAME.spec ; \
|
||||
cp -Lr /build/rpm/* . && \
|
||||
cp /dist/*.tar.xz SOURCES && \
|
||||
rpmbuild --clean -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "vendor $VENDOR" \
|
||||
-D "email $EMAIL" \
|
||||
-D "name $NAME" \
|
||||
-D "version $VERSION" \
|
||||
-D "revision $REVISION" \
|
||||
-D "architecture $ARCHITECTURE" \
|
||||
SPECS/$NAME.spec && \
|
||||
mv RPMS/$ARCHITECTURE/*.rpm /dist
|
|
@ -0,0 +1,40 @@
|
|||
FROM ubuntu:xenial
|
||||
|
||||
# packaging dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
dh-make \
|
||||
fakeroot \
|
||||
build-essential \
|
||||
devscripts && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# packaging
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ARG RUNTIME_VERSION
|
||||
ARG DOCKER_VERSION
|
||||
|
||||
ENV DEBFULLNAME "NVIDIA CORPORATION"
|
||||
ENV DEBEMAIL "cudatools@nvidia.com"
|
||||
ENV REVISION "$PKG_VERS-$PKG_REV"
|
||||
ENV RUNTIME_VERSION $RUNTIME_VERSION
|
||||
ENV DOCKER_VERSION $DOCKER_VERSION
|
||||
ENV DISTRIB "UNRELEASED"
|
||||
ENV SECTION ""
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-docker2-$PKG_VERS
|
||||
RUN mkdir -p $DIST_DIR
|
||||
|
||||
# nvidia-docker 2.0
|
||||
COPY nvidia-docker $DIST_DIR/nvidia-docker
|
||||
COPY daemon.json $DIST_DIR/daemon.json
|
||||
|
||||
WORKDIR $DIST_DIR
|
||||
COPY debian ./debian
|
||||
|
||||
RUN dch --create --package nvidia-docker2 -v "$REVISION" "v$REVISION" -D "$DISTRIB" && \
|
||||
dch -r ""
|
||||
|
||||
CMD debuild --preserve-env --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
2
LICENSE
2
LICENSE
|
@ -1,4 +1,4 @@
|
|||
Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
|
145
Makefile
145
Makefile
|
@ -1,87 +1,90 @@
|
|||
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
NV_DOCKER ?= docker
|
||||
DOCKER ?= docker
|
||||
|
||||
prefix ?= /usr/local
|
||||
exec_prefix ?= $(prefix)
|
||||
bindir ?= $(exec_prefix)/bin
|
||||
VERSION := 2.0.1
|
||||
PKG_REV := 1
|
||||
RUNTIME_VERSION := 1.1.0
|
||||
|
||||
CR_NAME := NVIDIA CORPORATION
|
||||
CR_EMAIL := digits@nvidia.com
|
||||
PKG_NAME := nvidia-docker
|
||||
PKG_VERS := 1.0.1
|
||||
PKG_REV := 1
|
||||
ifneq ($(MAKECMDGOALS),rpm)
|
||||
PKG_ARCH := amd64
|
||||
else
|
||||
PKG_ARCH := x86_64
|
||||
endif
|
||||
DIST_DIR := $(CURDIR)/dist
|
||||
|
||||
# Mirror the BUILD_ARCH from the build Dockerfile
|
||||
BUILD_ARCH = .$(shell uname -m)
|
||||
ifneq ($(BUILD_ARCH),.ppc64le)
|
||||
BUILD_ARCH =
|
||||
else
|
||||
PKG_ARCH = ppc64le
|
||||
endif
|
||||
.NOTPARALLEL:
|
||||
.PHONY: all
|
||||
|
||||
BIN_DIR := $(CURDIR)/bin
|
||||
DIST_DIR := $(CURDIR)/dist
|
||||
BUILD_DIR := $(CURDIR)/build
|
||||
DOCKER_BIN := $(BIN_DIR)/nvidia-docker
|
||||
PLUGIN_BIN := $(BIN_DIR)/nvidia-docker-plugin
|
||||
all: xenial centos7
|
||||
|
||||
DOCKER_VERS := $(shell $(NV_DOCKER) version -f '{{.Client.Version}}')
|
||||
DOCKER_VERS_MAJ := $(shell echo $(DOCKER_VERS) | cut -d. -f1)
|
||||
DOCKER_VERS_MIN := $(shell echo $(DOCKER_VERS) | cut -d. -f2)
|
||||
xenial: 17.09.0-xenial 17.06.2-xenial 17.03.2-xenial 1.13.1-xenial 1.12.6-xenial
|
||||
|
||||
DOCKER_RMI := $(NV_DOCKER) rmi
|
||||
DOCKER_RUN := $(NV_DOCKER) run --rm --net=host
|
||||
DOCKER_IMAGES := $(NV_DOCKER) images -q $(PKG_NAME)
|
||||
DOCKER_BUILD := $(NV_DOCKER) build --build-arg USER_ID="$(shell id -u)" \
|
||||
--build-arg CR_NAME="$(CR_NAME)" \
|
||||
--build-arg CR_EMAIL="$(CR_EMAIL)" \
|
||||
--build-arg PKG_NAME="$(PKG_NAME)" \
|
||||
--build-arg PKG_VERS="$(PKG_VERS)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg PKG_ARCH="$(PKG_ARCH)"
|
||||
centos7: 17.09.0.ce-centos7 17.06.2.ce-centos7 17.03.2.ce-centos7 1.12.6-centos7
|
||||
|
||||
.PHONY: all build install uninstall clean distclean tarball deb rpm
|
||||
17.09.0-xenial:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker17.09.0-1" \
|
||||
--build-arg DOCKER_VERSION="docker-ce (= 17.09.0~ce-0~ubuntu) | docker-ee (= 17.09.0~ee-0~ubuntu)" \
|
||||
--build-arg PKG_VERS="$(VERSION)+docker17.09.0" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.xenial .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
|
||||
|
||||
all: build
|
||||
17.06.2-xenial:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker17.06.2-1" \
|
||||
--build-arg DOCKER_VERSION="docker-ce (= 17.06.2~ce-0~ubuntu) | docker-ee (= 17.06.2~ee-0~ubuntu)" \
|
||||
--build-arg PKG_VERS="$(VERSION)+docker17.06.2" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.xenial .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
|
||||
|
||||
build: distclean
|
||||
@mkdir -p $(BIN_DIR)
|
||||
@$(DOCKER_BUILD) -t $(PKG_NAME):$@ -f Dockerfile.$@$(BUILD_ARCH) $(CURDIR)
|
||||
@$(DOCKER_RUN) -v $(BIN_DIR):/go/bin:Z $(PKG_NAME):$@
|
||||
17.03.2-xenial:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker17.03.2-1" \
|
||||
--build-arg DOCKER_VERSION="docker-ce (= 17.03.2~ce-0~ubuntu-xenial) | docker-ee (= 17.03.2~ee-0~ubuntu-xenial)" \
|
||||
--build-arg PKG_VERS="$(VERSION)+docker17.03.2" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.xenial .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
|
||||
|
||||
install: build
|
||||
install -D -m 755 -t $(bindir) $(DOCKER_BIN)
|
||||
install -D -m 755 -t $(bindir) $(PLUGIN_BIN)
|
||||
1.13.1-xenial:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker1.13.1-1" \
|
||||
--build-arg DOCKER_VERSION="docker-engine(= 1.13.1-0~ubuntu-xenial)" \
|
||||
--build-arg PKG_VERS="$(VERSION)+docker1.13.1" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.xenial .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
|
||||
|
||||
uninstall:
|
||||
$(RM) $(bindir)/$(notdir $(DOCKER_BIN))
|
||||
$(RM) $(bindir)/$(notdir $(PLUGIN_BIN))
|
||||
1.12.6-xenial:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)+docker1.12.6-1" \
|
||||
--build-arg DOCKER_VERSION="docker-engine (= 1.12.6-0~ubuntu-xenial) | docker.io (= 1.12.6-0ubuntu1~16.04.1)" \
|
||||
--build-arg PKG_VERS="$(VERSION)+docker1.12.6" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.xenial .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/xenial:/dist:Z nvidia-docker2:$@
|
||||
|
||||
clean:
|
||||
-@$(DOCKER_IMAGES) | xargs $(DOCKER_RMI) 2> /dev/null
|
||||
-@$(DOCKER_RMI) golang:1.5 ubuntu:14.04 centos:7 2> /dev/null
|
||||
17.09.0.ce-centos7:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker17.09.0" \
|
||||
--build-arg DOCKER_VERSION="docker-ce = 17.09.0.ce" \
|
||||
--build-arg PKG_VERS="$(VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV).docker17.09.0.ce" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@
|
||||
|
||||
distclean:
|
||||
@rm -rf $(BIN_DIR)
|
||||
@rm -rf $(DIST_DIR)
|
||||
17.06.2.ce-centos7:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker17.06.2" \
|
||||
--build-arg DOCKER_VERSION="docker-ce = 17.06.2.ce" \
|
||||
--build-arg PKG_VERS="$(VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV).docker17.06.2.ce" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@
|
||||
|
||||
tarball: build
|
||||
@mkdir -p $(DIST_DIR)
|
||||
tar --transform='s;.*/;$(PKG_NAME)/;' -caf $(DIST_DIR)/$(PKG_NAME)_$(PKG_VERS)_$(PKG_ARCH).tar.xz $(BIN_DIR)/*
|
||||
@printf "\nFind tarball at $(DIST_DIR)\n\n"
|
||||
17.03.2.ce-centos7:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker17.03.2" \
|
||||
--build-arg DOCKER_VERSION="docker-ce = 17.03.2.ce" \
|
||||
--build-arg PKG_VERS="$(VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV).docker17.03.2.ce" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@
|
||||
|
||||
deb: tarball
|
||||
@$(DOCKER_BUILD) -t $(PKG_NAME):$@ -f Dockerfile.$@$(BUILD_ARCH) $(CURDIR)
|
||||
@$(DOCKER_RUN) -ti -v $(DIST_DIR):/dist:Z -v $(BUILD_DIR):/build:Z $(PKG_NAME):$@
|
||||
@printf "\nFind packages at $(DIST_DIR)\n\n"
|
||||
|
||||
rpm: tarball
|
||||
@$(DOCKER_BUILD) -t $(PKG_NAME):$@ -f Dockerfile.$@$(BUILD_ARCH) $(CURDIR)
|
||||
@$(DOCKER_RUN) -ti -v $(DIST_DIR):/dist:Z -v $(BUILD_DIR):/build:Z $(PKG_NAME):$@
|
||||
@printf "\nFind packages at $(DIST_DIR)\n\n"
|
||||
1.12.6-centos7:
|
||||
$(DOCKER) build --build-arg RUNTIME_VERSION="$(RUNTIME_VERSION)-1.docker1.12.6" \
|
||||
--build-arg DOCKER_VERSION="docker = 2:1.12.6" \
|
||||
--build-arg PKG_VERS="$(VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV).docker1.12.6" \
|
||||
-t nvidia-docker2:$@ -f Dockerfile.centos7 .
|
||||
$(DOCKER) run --rm -v $(DIST_DIR)/centos7:/dist:Z nvidia-docker2:$@
|
||||
|
|
114
README.md
114
README.md
|
@ -1,61 +1,103 @@
|
|||
# Docker Engine Utility for NVIDIA GPUs
|
||||
|
||||
**We are beginning the transition towards [nvidia-docker 2.0](https://github.com/NVIDIA/nvidia-docker/tree/2.0), please help us test it.**
|
||||
[![GitHub license](https://img.shields.io/badge/license-New%20BSD-blue.svg?style=flat-square)](https://raw.githubusercontent.com/NVIDIA/nvidia-docker/master/LICENSE)
|
||||
[![Package repository](https://img.shields.io/badge/packages-repository-b956e8.svg?style=flat-square)](https://nvidia.github.io/nvidia-docker)
|
||||
|
||||
![nvidia-gpu-docker](https://cloud.githubusercontent.com/assets/3028125/12213714/5b208976-b632-11e5-8406-38d379ec46aa.png)
|
||||
|
||||
# Documentation
|
||||
**Warning: This project is based on an alpha release (libnvidia-container). It is already more stable than 1.0 but we need help testing it.**
|
||||
|
||||
The full documentation is available on the [repository wiki](https://github.com/NVIDIA/nvidia-docker/wiki).
|
||||
A good place to start is to understand [why nvidia-docker](https://github.com/NVIDIA/nvidia-docker/wiki/Motivation) is needed in the first place.
|
||||
## Differences with 1.0
|
||||
* Doesn't require wrapping the Docker CLI and doesn't need a separate daemon,
|
||||
* GPU isolation is now achieved with environment variable `NVIDIA_VISIBLE_DEVICES`,
|
||||
* Can enable GPU support for any Docker image. Not just the ones based on our official CUDA images,
|
||||
* Package repositories are available for Ubuntu and CentOS,
|
||||
* Uses a new implementation based on [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
|
||||
|
||||
## Removing nvidia-docker 1.0
|
||||
|
||||
# Quick start
|
||||
Version 1.0 of the nvidia-docker package must be cleanly removed before continuing.
|
||||
You must stop and remove **all** containers started with nvidia-docker 1.0.
|
||||
|
||||
Assuming the NVIDIA drivers and Docker® Engine are properly installed (see [installation](https://github.com/NVIDIA/nvidia-docker/wiki/Installation))
|
||||
|
||||
#### _Ubuntu distributions_
|
||||
#### Ubuntu distributions
|
||||
```sh
|
||||
# Install nvidia-docker and nvidia-docker-plugin
|
||||
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker_1.0.1-1_amd64.deb
|
||||
sudo dpkg -i /tmp/nvidia-docker*.deb && rm /tmp/nvidia-docker*.deb
|
||||
|
||||
# Test nvidia-smi
|
||||
nvidia-docker run --rm nvidia/cuda nvidia-smi
|
||||
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
|
||||
sudo apt-get purge nvidia-docker
|
||||
```
|
||||
|
||||
#### _CentOS distributions_
|
||||
```sh
|
||||
# Install nvidia-docker and nvidia-docker-plugin
|
||||
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker-1.0.1-1.x86_64.rpm
|
||||
sudo rpm -i /tmp/nvidia-docker*.rpm && rm /tmp/nvidia-docker*.rpm
|
||||
sudo systemctl start nvidia-docker
|
||||
#### CentOS distributions
|
||||
|
||||
# Test nvidia-smi
|
||||
nvidia-docker run --rm nvidia/cuda nvidia-smi
|
||||
```
|
||||
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
|
||||
sudo yum remove nvidia-docker
|
||||
```
|
||||
|
||||
#### _Other distributions_
|
||||
```sh
|
||||
# Install nvidia-docker and nvidia-docker-plugin
|
||||
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.1/nvidia-docker_1.0.1_amd64.tar.xz
|
||||
sudo tar --strip-components=1 -C /usr/bin -xvf /tmp/nvidia-docker*.tar.xz && rm /tmp/nvidia-docker*.tar.xz
|
||||
## Installation
|
||||
|
||||
# Run nvidia-docker-plugin
|
||||
sudo -b nohup nvidia-docker-plugin > /tmp/nvidia-docker.log
|
||||
**If you have a custom `/etc/docker/daemon.json`, the `nvidia-docker2` package will override it.**
|
||||
|
||||
# Test nvidia-smi
|
||||
nvidia-docker run --rm nvidia/cuda nvidia-smi
|
||||
#### Ubuntu distributions
|
||||
|
||||
1. Install the repository for your distribution by following the instructions [here](http://nvidia.github.io/nvidia-docker/).
|
||||
2. Install the `nvidia-docker2` package and restart the Docker daemon:
|
||||
```
|
||||
sudo apt-get install nvidia-docker2
|
||||
sudo pkill -SIGHUP dockerd
|
||||
```
|
||||
|
||||
#### _ppc64le (POWER) Archictecture_
|
||||
There is limited build support for ppc64le. Running `make deb` will build the nvidia-docker deb for ppc64le (if run on a ppc64le system). If the deb install fails because you have the 'docker.io' (>= v1.9) package installed, but not the 'docker-engine' package, you can force-install. There is currently no docker-provided docker-engine repository for ppc64le.
|
||||
#### CentOS distributions
|
||||
1. Install the repository for your distribution by following the instructions [here](http://nvidia.github.io/nvidia-docker/).
|
||||
2. Install the `nvidia-docker2` package and restart the Docker daemon:
|
||||
```
|
||||
sudo yum install nvidia-docker2
|
||||
sudo pkill -SIGHUP dockerd
|
||||
```
|
||||
|
||||
Not all the build targets for ppc64le have been implemented. If you would like for a Dockerfile to be created to enable a ppc64le target, please open an issue.
|
||||
## Usage
|
||||
|
||||
# Issues and Contributing
|
||||
#### NVIDIA runtime
|
||||
nvidia-docker registers a new container runtime to the Docker daemon.
|
||||
You must select the `nvidia` runtime when using `docker run`:
|
||||
```
|
||||
docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
|
||||
```
|
||||
|
||||
**A signed copy of the [Contributor License Agreement](https://raw.githubusercontent.com/NVIDIA/nvidia-docker/master/CLA) needs to be provided to digits@nvidia.com before any change can be accepted.**
|
||||
#### GPU isolation
|
||||
Set the environment variable `NVIDIA_VISIBLE_DEVICES` in the container:
|
||||
```
|
||||
docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=0 --rm nvidia/cuda nvidia-smi
|
||||
```
|
||||
|
||||
#### Non-CUDA image:
|
||||
Setting `NVIDIA_VISIBLE_DEVICES` will enable GPU support for any container image:
|
||||
```
|
||||
docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --rm debian:stretch nvidia-smi
|
||||
```
|
||||
|
||||
## Advanced
|
||||
|
||||
#### Backward compatibility
|
||||
|
||||
To help transitioning code from 1.0 to 2.0, a bash script is provided in `/usr/bin/nvidia-docker` for backward compatibility.
|
||||
It will automatically inject the `--runtime=nvidia` argument and convert `NV_GPU` to `NVIDIA_VISIBLE_DEVICES`.
|
||||
|
||||
#### Existing `daemon.json`
|
||||
If you have a custom `/etc/docker/daemon.json`, the `nvidia-docker2` package will override it.
|
||||
In this case, it is recommended to install [nvidia-container-runtime](https://github.com/nvidia/nvidia-container-runtime#installation) instead and register the new runtime manually.
|
||||
|
||||
#### Default runtime
|
||||
The default runtime used by the Docker® Engine is [runc](https://github.com/opencontainers/runc), our runtime can become the default one by configuring the docker daemon with `--default-runtime=nvidia`.
|
||||
Doing so will remove the need to add the `--runtime=nvidia` argument to `docker run`.
|
||||
It is also the only way to have GPU access during `docker build`.
|
||||
|
||||
#### Environment variables
|
||||
The behavior of the runtime can be modified through environment variables (such as `NVIDIA_VISIBLE_DEVICES`).
|
||||
Those environment variables are consumed by [nvidia-container-runtime](https://github.com/nvidia/nvidia-container-runtime) and are documented [here](https://github.com/nvidia/nvidia-container-runtime#environment-variables-oci-spec).
|
||||
Our official CUDA images use default values for these variables.
|
||||
|
||||
## Issues and Contributing
|
||||
|
||||
A signed copy of the [Contributor License Agreement](https://raw.githubusercontent.com/NVIDIA/nvidia-docker/master/CLA) needs to be provided to <a href="mailto:digits@nvidia.com">digits@nvidia.com</a> before any change can be accepted.
|
||||
|
||||
* Please let us know by [filing a new issue](https://github.com/NVIDIA/nvidia-docker/issues/new)
|
||||
* You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/)
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
[Unit]
|
||||
Description=NVIDIA Docker plugin
|
||||
Documentation=https://github.com/NVIDIA/nvidia-docker/wiki
|
||||
After=local-fs.target network.target
|
||||
Wants=docker.service
|
||||
|
||||
[Service]
|
||||
Environment="SOCK_DIR=/var/lib/nvidia-docker"
|
||||
Environment="SPEC_FILE=/etc/docker/plugins/nvidia-docker.spec"
|
||||
|
||||
User=nvidia-docker
|
||||
PermissionsStartOnly=true
|
||||
Restart=on-failure
|
||||
RestartSec=1
|
||||
TimeoutStartSec=0
|
||||
TimeoutStopSec=20
|
||||
|
||||
ExecStart=/usr/bin/nvidia-docker-plugin -s $SOCK_DIR
|
||||
ExecStartPost=/bin/sh -c '/bin/mkdir -p $( dirname $SPEC_FILE )'
|
||||
ExecStartPost=/bin/sh -c '/bin/echo unix://$SOCK_DIR/nvidia-docker.sock > $SPEC_FILE'
|
||||
ExecStopPost=/bin/rm -f $SPEC_FILE
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
|
@ -1,78 +0,0 @@
|
|||
nvidia-docker (1.0.1-1) trusty; urgency=low
|
||||
|
||||
* Support for Docker 17.03 including EE and CE (Closes: #323, #324)
|
||||
* Load UVM unconditionally
|
||||
* Fix Docker argument parsing (Closes: #295)
|
||||
* Fix images pull output (Closes: #310)
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Fri, 03 Mar 2017 00:59:14 +0000
|
||||
|
||||
nvidia-docker (1.0.0-1) trusty; urgency=low
|
||||
|
||||
* Support for Docker 1.13
|
||||
* Fix CPU affinity reporting on systems where NUMA is disabled (Closes: #198)
|
||||
* Fix premature EOF in the remote API responses (Closes: #123)
|
||||
* Add support for the VolumeDriver.Capabilities plugin endpoint
|
||||
* Enable ppc64le library lookup (Closes: #194)
|
||||
* Fix parsing of DOCKER_HOST for unix domain sockets (Closes: #119)
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Wed, 18 Jan 2017 21:44:42 +0000
|
||||
|
||||
nvidia-docker (1.0.0~rc.3-1) trusty; urgency=low
|
||||
|
||||
* Support for Docker 1.12
|
||||
* Add volume mount options support to the nvidia package
|
||||
* Export the nvidia-uvm-tools device
|
||||
* Provide the libcuda.so symlink as part of the driver volume (Closes: #103)
|
||||
* Use relative symlinks inside the volumes
|
||||
* Disable CUDA unified memory
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Fri, 17 Jun 2016 22:08:11 +0000
|
||||
|
||||
nvidia-docker (1.0.0~rc.2-1) trusty; urgency=low
|
||||
|
||||
* Allow UUIDs to be used in NV_GPU and docker/cli RestAPI endpoint
|
||||
* Change the plugin usage with version information (Closes: #90)
|
||||
* Remove the volume setup command (Closes: #96)
|
||||
* Add support for the Pascal architecture
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Sat, 28 May 2016 00:18:44 +0000
|
||||
|
||||
nvidia-docker (1.0.0~rc-1) trusty; urgency=low
|
||||
|
||||
* Add /docker/cli/json RestAPI endpoint (Closes: #39, #91)
|
||||
* Fix support for Docker 1.9 (Closes: #83)
|
||||
* Handle gracefully devices unsupported by NVML (Closes: #40)
|
||||
* Improve error reporting
|
||||
* Support for Docker 1.11 (Closes: #89, #84, #77, #73)
|
||||
* Add NVIDIA Docker version output
|
||||
* Improve init scripts and add support for systemd
|
||||
* Query CPU affinity through sysfs instead of NVML (Closes: #65)
|
||||
* Load UVM before anything else
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Tue, 03 May 2016 17:44:36 -0700
|
||||
|
||||
nvidia-docker (1.0.0~beta.3-1) trusty; urgency=low
|
||||
|
||||
* Remove driver hard dependency (NVML)
|
||||
* Improve error handling and REST API output
|
||||
* Support for 364 drivers
|
||||
* Preventive removal of the plugin socket
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Mon, 28 Mar 2016 16:48:51 -0700
|
||||
|
||||
nvidia-docker (1.0.0~beta.2-1) trusty; urgency=low
|
||||
|
||||
* Support for Docker 1.10 (Closes: #46)
|
||||
* Support for Docker plugin API v1.2
|
||||
* Support for 361 drivers
|
||||
* Add copy strategy for cross-device volumes (Closes: #47)
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Mon, 07 Mar 2016 11:41:21 -0800
|
||||
|
||||
nvidia-docker (1.0.0~beta-1) trusty; urgency=low
|
||||
|
||||
* Initial release (Closes: #33)
|
||||
|
||||
-- NVIDIA CORPORATION <digits@nvidia.com> Mon, 08 Feb 2016 11:17:52 -0800
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
Source: #PACKAGE#
|
||||
Section: devel
|
||||
Priority: optional
|
||||
Maintainer: #USERNAME# <#EMAIL#>
|
||||
Build-Depends: #BUILD_DEPS#, dh-systemd
|
||||
Standards-Version: #POLICY#
|
||||
Homepage: https://github.com/NVIDIA/nvidia-docker/wiki
|
||||
Vcs-Git: https://github.com/NVIDIA/nvidia-docker
|
||||
Vcs-Browser: https://github.com/NVIDIA/nvidia-docker
|
||||
|
||||
Package: #PACKAGE#
|
||||
Architecture: #ARCHITECTURE#
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends}, adduser, docker-engine (>= 1.9.0) | docker-ce | docker-ee, libcap2-bin
|
||||
Description: NVIDIA Docker container tools
|
||||
NVIDIA Docker provides utilities to extend the Docker CLI allowing users
|
||||
to build and run GPU applications as lightweight containers.
|
|
@ -1,36 +0,0 @@
|
|||
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: #PACKAGE#
|
||||
Source: https://github.com/NVIDIA/nvidia-docker
|
||||
|
||||
Files: *
|
||||
Copyright: #YEAR# #USERNAME# <#EMAIL#>
|
||||
License: BSD-3-Clause
|
||||
|
||||
Files: debian/*
|
||||
Copyright: #YEAR# #USERNAME# <#EMAIL#>
|
||||
License: BSD-3-Clause
|
||||
|
||||
License: BSD-3-Clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of #USERNAME# nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,2 +0,0 @@
|
|||
# NVIDIA Docker plugin daemon options
|
||||
NVIDIA_DOCKER_PLUGIN_OPTS="-s /var/lib/nvidia-docker"
|
|
@ -1 +0,0 @@
|
|||
/var/lib/nvidia-docker
|
|
@ -1 +0,0 @@
|
|||
#PACKAGE#/* /usr/bin
|
|
@ -1,3 +0,0 @@
|
|||
improbable-bug-number-in-closes
|
||||
hardening-no-relro
|
||||
binary-without-manpage
|
|
@ -1,32 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
NVIDIA_DOCKER_USER=#PACKAGE#
|
||||
NVIDIA_DOCKER_ROOT=/var/lib/nvidia-docker
|
||||
NVIDIA_DOCKER_PLUGIN=/usr/bin/nvidia-docker-plugin
|
||||
|
||||
case "$1" in
|
||||
configure)
|
||||
if [ -z "$2" ]; then
|
||||
echo "Configuring user"
|
||||
id -u "$NVIDIA_DOCKER_USER" >/dev/null 2>&1 || \
|
||||
useradd -r -M -d "$NVIDIA_DOCKER_ROOT" -s /usr/sbin/nologin -c "NVIDIA Docker plugin" "$NVIDIA_DOCKER_USER"
|
||||
fi
|
||||
echo "Setting up permissions"
|
||||
chown "$NVIDIA_DOCKER_USER": "$NVIDIA_DOCKER_ROOT"
|
||||
setcap cap_fowner+pe "$NVIDIA_DOCKER_PLUGIN"
|
||||
;;
|
||||
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "postinst called with unknown argument \`$1'" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
exit 0
|
|
@ -1,24 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
NVIDIA_DOCKER_USER=#PACKAGE#
|
||||
|
||||
case "$1" in
|
||||
purge)
|
||||
id -u "$NVIDIA_DOCKER_USER" >/dev/null 2>&1 && \
|
||||
userdel "$NVIDIA_DOCKER_USER"
|
||||
;;
|
||||
|
||||
upgrade|failed-upgrade|remove|abort-install|abort-upgrade|disappear)
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "postrm called with unknown argument \`$1'" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
exit 0
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
NVIDIA_DOCKER_DRIVER=#PACKAGE#
|
||||
NVIDIA_DOCKER_ROOT=/var/lib/nvidia-docker
|
||||
|
||||
case "$1" in
|
||||
remove)
|
||||
echo "Purging NVIDIA volumes"
|
||||
docker volume ls | awk -v drv="$NVIDIA_DOCKER_DRIVER" '{if ($1 == drv) print $2}' | xargs -r docker volume rm ||
|
||||
echo "Failed to remove NVIDIA volumes, ignoring"
|
||||
find "$NVIDIA_DOCKER_ROOT" ! -wholename "$NVIDIA_DOCKER_ROOT" -type d -empty -delete || true
|
||||
;;
|
||||
|
||||
upgrade|deconfigure|failed-upgrade)
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "prerm called with unknown argument \`$1'" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
exit 0
|
|
@ -1 +0,0 @@
|
|||
../common/nvidia-docker.service
|
|
@ -1,42 +0,0 @@
|
|||
description "NVIDIA Docker plugin"
|
||||
|
||||
start on (local-filesystems and net-device-up)
|
||||
stop on runlevel [!2345]
|
||||
|
||||
normal exit 0 KILL TERM
|
||||
respawn
|
||||
respawn limit 5 10
|
||||
|
||||
kill timeout 20
|
||||
|
||||
env NVIDIA_DOCKER_USER=#PACKAGE#
|
||||
env NVIDIA_DOCKER_PLUGIN=/usr/bin/nvidia-docker-plugin
|
||||
env NVIDIA_DOCKER_PLUGIN_SPEC=/etc/docker/plugins/nvidia-docker.spec
|
||||
|
||||
script
|
||||
if [ -f /etc/default/$UPSTART_JOB ]; then
|
||||
. /etc/default/$UPSTART_JOB
|
||||
fi
|
||||
OPTS="$NVIDIA_DOCKER_PLUGIN_OPTS"
|
||||
|
||||
exec start-stop-daemon -S -u "$NVIDIA_DOCKER_USER" -c "$NVIDIA_DOCKER_USER" \
|
||||
-a "$NVIDIA_DOCKER_PLUGIN" -- $OPTS
|
||||
end script
|
||||
|
||||
post-start script
|
||||
if [ -f /etc/default/$UPSTART_JOB ]; then
|
||||
. /etc/default/$UPSTART_JOB
|
||||
fi
|
||||
OPTS="$NVIDIA_DOCKER_PLUGIN_OPTS"
|
||||
SOCK_DIR=$( echo $OPTS | grep -oP -- '-s\s+\K\S+' )
|
||||
SOCK_FILE=unix://$SOCK_DIR/nvidia-docker.sock
|
||||
|
||||
if [ -n "$SOCK_DIR" ]; then
|
||||
mkdir -p $( dirname "$NVIDIA_DOCKER_PLUGIN_SPEC" )
|
||||
echo "$SOCK_FILE" > "$NVIDIA_DOCKER_PLUGIN_SPEC"
|
||||
fi
|
||||
end script
|
||||
|
||||
post-stop script
|
||||
rm -f "$NVIDIA_DOCKER_PLUGIN_SPEC"
|
||||
end script
|
|
@ -1,5 +0,0 @@
|
|||
#! /bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
sed -i "s/#ARCHITECTURE#/${ARCHITECTURE}/" debian/control
|
|
@ -1,10 +0,0 @@
|
|||
#!/usr/bin/make -f
|
||||
# -*- makefile -*-
|
||||
|
||||
#export DH_VERBOSE=1
|
||||
|
||||
override_dh_shlibdeps:
|
||||
dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info
|
||||
|
||||
%:
|
||||
dh $@ --with=systemd
|
|
@ -1 +0,0 @@
|
|||
../../common/nvidia-docker.service
|
|
@ -1,128 +0,0 @@
|
|||
Name: %{name}
|
||||
Version: %{version}
|
||||
Release: %{revision}
|
||||
BuildArch: %{architecture}
|
||||
Group: Development Tools
|
||||
|
||||
Vendor: %{vendor}
|
||||
Packager: %{vendor} <%{email}>
|
||||
|
||||
Summary: NVIDIA Docker container tools
|
||||
URL: https://github.com/NVIDIA/nvidia-docker
|
||||
License: BSD
|
||||
|
||||
Source0: %{name}_%{version}_%{architecture}.tar.xz
|
||||
Source1: %{name}.service
|
||||
Source2: LICENSE
|
||||
|
||||
%{?systemd_requires}
|
||||
BuildRequires: systemd
|
||||
Requires: libcap
|
||||
|
||||
%define nvidia_docker_user %{name}
|
||||
%define nvidia_docker_driver %{name}
|
||||
%define nvidia_docker_root /var/lib/nvidia-docker
|
||||
|
||||
%description
|
||||
NVIDIA Docker provides utilities to extend the Docker CLI allowing users
|
||||
to build and run GPU applications as lightweight containers.
|
||||
|
||||
%prep
|
||||
%autosetup -n %{name}
|
||||
cp %{SOURCE1} %{SOURCE2} .
|
||||
|
||||
%install
|
||||
mkdir -p %{buildroot}%{_bindir}
|
||||
mkdir -p %{buildroot}%{_unitdir}
|
||||
mkdir -p %{buildroot}%{nvidia_docker_root}
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-docker
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-docker-plugin
|
||||
install -m 644 -t %{buildroot}%{_unitdir} %{name}.service
|
||||
|
||||
%files
|
||||
%license LICENSE
|
||||
%dir %{nvidia_docker_root}
|
||||
%{_bindir}/*
|
||||
%{_unitdir}/*
|
||||
|
||||
%post
|
||||
if [ $1 -eq 1 ]; then
|
||||
echo "Configuring user"
|
||||
id -u %{nvidia_docker_user} >/dev/null 2>&1 || \
|
||||
useradd -r -M -d %{nvidia_docker_root} -s /usr/sbin/nologin -c "NVIDIA Docker plugin" %{nvidia_docker_user}
|
||||
fi
|
||||
echo "Setting up permissions"
|
||||
chown %{nvidia_docker_user}: %{nvidia_docker_root}
|
||||
setcap cap_fowner+pe %{_bindir}/nvidia-docker-plugin
|
||||
%systemd_post %{name}
|
||||
|
||||
%preun
|
||||
if [ $1 -eq 0 ]; then
|
||||
echo "Purging NVIDIA volumes"
|
||||
docker volume ls | awk -v drv=%{nvidia_docker_driver} '{if ($1 == drv) print $2}' | xargs -r docker volume rm ||
|
||||
echo "Failed to remove NVIDIA volumes, ignoring"
|
||||
find %{nvidia_docker_root} ! -wholename %{nvidia_docker_root} -type d -empty -delete
|
||||
fi
|
||||
%systemd_preun %{name}
|
||||
|
||||
%postun
|
||||
if [ $1 -eq 0 ]; then
|
||||
id -u %{nvidia_docker_user} >/dev/null 2>&1 && \
|
||||
userdel %{nvidia_docker_user}
|
||||
fi
|
||||
%systemd_postun_with_restart %{name}
|
||||
|
||||
%changelog
|
||||
* Fri Mar 03 2017 NVIDIA CORPORATION <digits@nvidia.com> 1.0.1-1
|
||||
- Support for Docker 17.03 including EE and CE (Closes: #323, #324)
|
||||
- Load UVM unconditionally
|
||||
- Fix Docker argument parsing (Closes: #295)
|
||||
- Fix images pull output (Closes: #310)
|
||||
|
||||
* Wed Jan 18 2017 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0-1
|
||||
- Support for Docker 1.13
|
||||
- Fix CPU affinity reporting on systems where NUMA is disabled (Closes: #198)
|
||||
- Fix premature EOF in the remote API responses (Closes: #123)
|
||||
- Add support for the VolumeDriver.Capabilities plugin endpoint
|
||||
- Enable ppc64le library lookup (Closes: #194)
|
||||
- Fix parsing of DOCKER_HOST for unix domain sockets (Closes: #119)
|
||||
|
||||
* Fri Jun 17 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~rc.3-1
|
||||
- Support for Docker 1.12
|
||||
- Add volume mount options support to the nvidia package
|
||||
- Export the nvidia-uvm-tools device
|
||||
- Provide the libcuda.so symlink as part of the driver volume (Closes: #103)
|
||||
- Use relative symlinks inside the volumes
|
||||
- Disable CUDA unified memory
|
||||
|
||||
* Sat May 28 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~rc.2-1
|
||||
- Allow UUIDs to be used in NV_GPU and docker/cli RestAPI endpoint
|
||||
- Change the plugin usage with version information (Closes: #90)
|
||||
- Remove the volume setup command (Closes: #96)
|
||||
- Add support for the Pascal architecture
|
||||
|
||||
* Tue May 03 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~rc-1
|
||||
- Add /docker/cli/json RestAPI endpoint (Closes: #39, #91)
|
||||
- Fix support for Docker 1.9 (Closes: #83)
|
||||
- Handle gracefully devices unsupported by NVML (Closes: #40)
|
||||
- Improve error reporting
|
||||
- Support for Docker 1.11 (Closes: #89, #84, #77, #73)
|
||||
- Add NVIDIA Docker version output
|
||||
- Improve init scripts and add support for systemd
|
||||
- Query CPU affinity through sysfs instead of NVML (Closes: #65)
|
||||
- Load UVM before anything else
|
||||
|
||||
* Mon Mar 28 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~beta.3-1
|
||||
- Remove driver hard dependency (NVML)
|
||||
- Improve error handling and REST API output
|
||||
- Support for 364 drivers
|
||||
- Preventive removal of the plugin socket
|
||||
|
||||
* Mon Mar 07 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~beta.2-1
|
||||
- Support for Docker 1.10 (Closes: #46)
|
||||
- Support for Docker plugin API v1.2
|
||||
- Support for 361 drivers
|
||||
- Add copy strategy for cross-device volumes (Closes: #47)
|
||||
|
||||
* Mon Feb 08 2016 NVIDIA CORPORATION <digits@nvidia.com> 1.0.0~beta-1
|
||||
- Initial release (Closes: #33)
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "/usr/bin/nvidia-container-runtime",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
Source: nvidia-docker2
|
||||
Section: @SECTION@utils
|
||||
Priority: optional
|
||||
Maintainer: NVIDIA CORPORATION <cudatools@nvidia.com>
|
||||
Standards-Version: 3.9.8
|
||||
Homepage: https://github.com/NVIDIA/nvidia-docker/wiki
|
||||
Vcs-Git: https://github.com/NVIDIA/nvidia-docker
|
||||
Vcs-Browser: https://github.com/NVIDIA/nvidia-docker
|
||||
Build-Depends: debhelper (>= 9)
|
||||
|
||||
Package: nvidia-docker2
|
||||
Architecture: all
|
||||
Breaks: nvidia-docker
|
||||
Replaces: nvidia-docker
|
||||
Depends: ${misc:Depends}, nvidia-container-runtime (= @RUNTIME_VERSION@), @DOCKER_VERSION@
|
||||
Description: nvidia-docker CLI wrapper
|
||||
Replaces nvidia-docker with a new implementation based on
|
||||
nvidia-container-runtime
|
|
@ -0,0 +1,35 @@
|
|||
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: nvidia-docker2
|
||||
Source: https://github.com/NVIDIA/nvidia-docker
|
||||
|
||||
Files: *
|
||||
Copyright: 2017 NVIDIA CORPORATION <cudatools@nvidia.com>
|
||||
License: BSD-3-Clause
|
||||
|
||||
License: BSD-3-clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
.
|
||||
Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
.
|
||||
Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
.
|
||||
Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,2 @@
|
|||
daemon.json /etc/docker
|
||||
nvidia-* /usr/bin
|
|
@ -0,0 +1,2 @@
|
|||
new-package-should-close-itp-bug
|
||||
binary-without-manpage
|
|
@ -0,0 +1,7 @@
|
|||
#! /bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
sed -i "s;@SECTION@;${SECTION:+$SECTION/};g" debian/control
|
||||
sed -i "s;@RUNTIME_VERSION@;${RUNTIME_VERSION};g" debian/control
|
||||
sed -i "s;@DOCKER_VERSION@;${DOCKER_VERSION};g" debian/control
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/make -f
|
||||
# -*- makefile -*-
|
||||
|
||||
#export DH_VERBOSE=1
|
||||
|
||||
%:
|
||||
dh $@
|
|
@ -0,0 +1,31 @@
|
|||
#! /bin/bash
|
||||
# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
NV_DOCKER=${NV_DOCKER:-"docker"}
|
||||
|
||||
DOCKER_ARGS=""
|
||||
NV_DOCKER_ARGS=""
|
||||
while [ $# -gt 0 ]; do
|
||||
arg=$1
|
||||
shift
|
||||
DOCKER_ARGS="$DOCKER_ARGS $arg"
|
||||
case $arg in
|
||||
run|create)
|
||||
NV_DOCKER_ARGS="--runtime=nvidia"
|
||||
if [ ! -z $NV_GPU ]; then
|
||||
NV_DOCKER_ARGS="$NV_DOCKER_ARGS -e NVIDIA_VISIBLE_DEVICES=${NV_GPU// /,}"
|
||||
fi
|
||||
break
|
||||
;;
|
||||
version)
|
||||
printf "NVIDIA Docker: 2.0.0\n"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ! -z $NV_DEBUG ]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
$NV_DOCKER $DOCKER_ARGS $NV_DOCKER_ARGS "$@"
|
|
@ -1,4 +1,4 @@
|
|||
Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
|
@ -0,0 +1,39 @@
|
|||
Name: nvidia-docker2
|
||||
Version: %{version}
|
||||
Release: %{release}
|
||||
BuildArch: noarch
|
||||
Group: Development Tools
|
||||
|
||||
Vendor: NVIDIA CORPORATION
|
||||
Packager: NVIDIA CORPORATION <cudatools@nvidia.com>
|
||||
|
||||
Summary: nvidia-docker CLI wrapper
|
||||
URL: https://github.com/NVIDIA/nvidia-docker
|
||||
License: BSD
|
||||
|
||||
Source0: nvidia-docker
|
||||
Source1: daemon.json
|
||||
Source2: LICENSE
|
||||
|
||||
Conflicts: nvidia-docker
|
||||
Requires: nvidia-container-runtime = %{runtime_version}
|
||||
Requires: %{docker_version}
|
||||
|
||||
%description
|
||||
Replaces nvidia-docker with a new implementation based on nvidia-container-runtime
|
||||
|
||||
%prep
|
||||
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} .
|
||||
|
||||
%install
|
||||
mkdir -p %{buildroot}%{_bindir}
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-docker
|
||||
mkdir -p %{buildroot}/etc/docker
|
||||
install -m 644 -t %{buildroot}/etc/docker daemon.json
|
||||
|
||||
%files
|
||||
%license LICENSE
|
||||
%{_bindir}/nvidia-docker
|
||||
/etc/docker/daemon.json
|
||||
|
||||
%changelog
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-centos7
|
||||
|
||||
RUN yum install -y \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/1_Utilities/bandwidthTest
|
||||
RUN make
|
||||
|
||||
CMD ./bandwidthTest --mode=shmoo
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-centos7
|
||||
|
||||
RUN yum install -y \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/1_Utilities/deviceQuery
|
||||
RUN make
|
||||
|
||||
CMD ./deviceQuery
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-centos7
|
||||
|
||||
RUN yum install -y \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/0_Simple/matrixMulCUBLAS
|
||||
RUN make
|
||||
|
||||
CMD ./matrixMulCUBLAS -sizemult=10
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-centos7
|
||||
|
||||
RUN yum install -y \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/5_Simulations/nbody
|
||||
RUN make
|
||||
|
||||
CMD ./nbody -benchmark
|
|
@ -1,3 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-centos7
|
||||
|
||||
CMD nvidia-smi -q
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-centos7
|
||||
|
||||
RUN yum install -y \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/0_Simple/vectorAdd
|
||||
RUN make
|
||||
|
||||
CMD ./vectorAdd
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-ubuntu16.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/1_Utilities/bandwidthTest
|
||||
RUN make
|
||||
|
||||
CMD ./bandwidthTest --mode=shmoo
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-ubuntu16.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/1_Utilities/deviceQuery
|
||||
RUN make
|
||||
|
||||
CMD ./deviceQuery
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-ubuntu16.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/0_Simple/matrixMulCUBLAS
|
||||
RUN make
|
||||
|
||||
CMD ./matrixMulCUBLAS -sizemult=10
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-ubuntu16.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/5_Simulations/nbody
|
||||
RUN make
|
||||
|
||||
CMD ./nbody -benchmark
|
|
@ -1,3 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-ubuntu16.04
|
||||
|
||||
CMD nvidia-smi -q
|
|
@ -1,10 +0,0 @@
|
|||
FROM nvidia/cuda:8.0-devel-ubuntu16.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-samples-$CUDA_PKG_VERSION && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/local/cuda/samples/0_Simple/vectorAdd
|
||||
RUN make
|
||||
|
||||
CMD ./vectorAdd
|
|
@ -1,82 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package cuda
|
||||
|
||||
// #cgo LDFLAGS: -lcudart_static -ldl -lrt
|
||||
// #include <stdlib.h>
|
||||
// #include <cuda_runtime_api.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type handle struct{ dev C.int }
|
||||
|
||||
type deviceProp struct {
|
||||
major int
|
||||
minor int
|
||||
multiProcessorCount uint
|
||||
ECCEnabled bool
|
||||
totalGlobalMem uint
|
||||
sharedMemPerMultiprocessor uint
|
||||
totalConstMem uint
|
||||
l2CacheSize uint
|
||||
memoryClockRate uint
|
||||
memoryBusWidth uint
|
||||
}
|
||||
|
||||
func errorString(ret C.cudaError_t) error {
|
||||
if ret == C.cudaSuccess {
|
||||
return nil
|
||||
}
|
||||
err := C.GoString(C.cudaGetErrorString(ret))
|
||||
return fmt.Errorf("cuda: %v", err)
|
||||
}
|
||||
|
||||
func driverGetVersion() (int, error) {
|
||||
var driver C.int
|
||||
|
||||
r := C.cudaDriverGetVersion(&driver)
|
||||
return int(driver), errorString(r)
|
||||
}
|
||||
|
||||
func deviceGetByPCIBusId(busid string) (handle, error) {
|
||||
var dev C.int
|
||||
|
||||
id := C.CString(busid)
|
||||
r := C.cudaDeviceGetByPCIBusId(&dev, id)
|
||||
C.free(unsafe.Pointer(id))
|
||||
return handle{dev}, errorString(r)
|
||||
}
|
||||
|
||||
func deviceCanAccessPeer(h1, h2 handle) (bool, error) {
|
||||
var ok C.int
|
||||
|
||||
r := C.cudaDeviceCanAccessPeer(&ok, h1.dev, h2.dev)
|
||||
return (ok != 0), errorString(r)
|
||||
}
|
||||
|
||||
func deviceReset() error {
|
||||
return errorString(C.cudaDeviceReset())
|
||||
}
|
||||
|
||||
func (h handle) getDeviceProperties() (*deviceProp, error) {
|
||||
var props C.struct_cudaDeviceProp
|
||||
|
||||
r := C.cudaGetDeviceProperties(&props, h.dev)
|
||||
p := &deviceProp{
|
||||
major: int(props.major),
|
||||
minor: int(props.minor),
|
||||
multiProcessorCount: uint(props.multiProcessorCount),
|
||||
ECCEnabled: bool(props.ECCEnabled != 0),
|
||||
totalGlobalMem: uint(props.totalGlobalMem),
|
||||
sharedMemPerMultiprocessor: uint(props.sharedMemPerMultiprocessor),
|
||||
totalConstMem: uint(props.totalConstMem),
|
||||
l2CacheSize: uint(props.l2CacheSize),
|
||||
memoryClockRate: uint(props.memoryClockRate),
|
||||
memoryBusWidth: uint(props.memoryBusWidth),
|
||||
}
|
||||
return p, errorString(r)
|
||||
}
|
120
src/cuda/cuda.go
120
src/cuda/cuda.go
|
@ -1,120 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package cuda
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type MemoryInfo struct {
|
||||
ECC *bool
|
||||
Global *uint
|
||||
Shared *uint
|
||||
Constant *uint
|
||||
L2Cache *uint
|
||||
Bandwidth *uint
|
||||
}
|
||||
|
||||
type Device struct {
|
||||
handle
|
||||
|
||||
Family *string
|
||||
Arch *string
|
||||
Cores *uint
|
||||
Memory MemoryInfo
|
||||
}
|
||||
|
||||
func archFamily(arch string) *string {
|
||||
m := map[string]string{
|
||||
"1": "Tesla",
|
||||
"2": "Fermi",
|
||||
"3": "Kepler",
|
||||
"5": "Maxwell",
|
||||
"6": "Pascal",
|
||||
}
|
||||
|
||||
f, ok := m[arch[:1]]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return &f
|
||||
}
|
||||
|
||||
func archSMCores(arch string) *uint {
|
||||
m := map[string]uint{
|
||||
"1.0": 8, // Tesla Generation (SM 1.0) G80 class
|
||||
"1.1": 8, // Tesla Generation (SM 1.1) G8x G9x class
|
||||
"1.2": 8, // Tesla Generation (SM 1.2) GT21x class
|
||||
"1.3": 8, // Tesla Generation (SM 1.3) GT20x class
|
||||
"2.0": 32, // Fermi Generation (SM 2.0) GF100 GF110 class
|
||||
"2.1": 48, // Fermi Generation (SM 2.1) GF10x GF11x class
|
||||
"3.0": 192, // Kepler Generation (SM 3.0) GK10x class
|
||||
"3.2": 192, // Kepler Generation (SM 3.2) TK1 class
|
||||
"3.5": 192, // Kepler Generation (SM 3.5) GK11x GK20x class
|
||||
"3.7": 192, // Kepler Generation (SM 3.7) GK21x class
|
||||
"5.0": 128, // Maxwell Generation (SM 5.0) GM10x class
|
||||
"5.2": 128, // Maxwell Generation (SM 5.2) GM20x class
|
||||
"5.3": 128, // Maxwell Generation (SM 5.3) TX1 class
|
||||
"6.0": 64, // Pascal Generation (SM 6.0) GP100 class
|
||||
"6.1": 128, // Pascal Generation (SM 6.1) GP10x class
|
||||
"6.2": 128, // Pascal Generation (SM 6.2) GP10x class
|
||||
}
|
||||
|
||||
c, ok := m[arch]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return &c
|
||||
}
|
||||
|
||||
func GetDriverVersion() (string, error) {
|
||||
d, err := driverGetVersion()
|
||||
return fmt.Sprintf("%d.%d", d/1000, d%100/10), err
|
||||
}
|
||||
|
||||
func NewDevice(busid string) (device *Device, err error) {
|
||||
h, err := deviceGetByPCIBusId(busid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
props, err := h.getDeviceProperties()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arch := fmt.Sprintf("%d.%d", props.major, props.minor)
|
||||
family := archFamily(arch)
|
||||
cores := archSMCores(arch)
|
||||
bw := 2 * (props.memoryClockRate / 1000) * (props.memoryBusWidth / 8)
|
||||
|
||||
// Destroy the active CUDA context
|
||||
if err := deviceReset(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
device = &Device{
|
||||
handle: h,
|
||||
Family: family,
|
||||
Arch: &arch,
|
||||
Cores: cores,
|
||||
Memory: MemoryInfo{
|
||||
ECC: &props.ECCEnabled,
|
||||
Global: &props.totalGlobalMem,
|
||||
Shared: &props.sharedMemPerMultiprocessor,
|
||||
Constant: &props.totalConstMem,
|
||||
L2Cache: &props.l2CacheSize,
|
||||
Bandwidth: &bw, // MB/s
|
||||
},
|
||||
}
|
||||
if cores != nil {
|
||||
*device.Cores *= props.multiProcessorCount
|
||||
}
|
||||
*device.Memory.Global /= 1024 * 1024 // MiB
|
||||
*device.Memory.Shared /= 1024 // KiB
|
||||
*device.Memory.Constant /= 1024 // KiB
|
||||
*device.Memory.L2Cache /= 1024 // KiB
|
||||
return
|
||||
}
|
||||
|
||||
func CanAccessPeer(dev1, dev2 *Device) (bool, error) {
|
||||
return deviceCanAccessPeer(dev1.handle, dev2.handle)
|
||||
}
|
|
@ -1,234 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
var dockerCmd = []string{"docker"}
|
||||
|
||||
func SetCommand(cmd ...string) {
|
||||
if len(cmd) > 0 {
|
||||
dockerCmd = cmd
|
||||
}
|
||||
}
|
||||
|
||||
func docker(stdout bool, command string, arg ...string) (b []byte, err error) {
|
||||
var buf bytes.Buffer
|
||||
|
||||
args := append(append(dockerCmd[1:], command), arg...)
|
||||
cmd := exec.Command(dockerCmd[0], args...)
|
||||
cmd.Stderr = &buf
|
||||
|
||||
if stdout {
|
||||
cmd.Stdout = os.Stderr
|
||||
err = cmd.Run()
|
||||
} else {
|
||||
b, err = cmd.Output()
|
||||
}
|
||||
if err != nil {
|
||||
b = bytes.TrimSpace(buf.Bytes())
|
||||
b = bytes.TrimPrefix(b, []byte("Error: "))
|
||||
if len(b) > 0 {
|
||||
return nil, fmt.Errorf("%s", b)
|
||||
} else {
|
||||
return nil, fmt.Errorf("failed to run docker command")
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// List of boolean options: https://github.com/docker/docker/blob/17.03.x/contrib/completion/bash/docker
|
||||
var lastSupportedVersion = "17.03"
|
||||
var booleanFlags = map[string]map[string][]string{
|
||||
"1.9": {
|
||||
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
|
||||
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
|
||||
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
|
||||
"-ip-masq", "-iptables", "-ipv6", "-selinux-enabled", "-userland-proxy"},
|
||||
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
|
||||
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
|
||||
"-detach", "d", "-rm", "-sig-proxy"},
|
||||
},
|
||||
"1.10": {
|
||||
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
|
||||
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
|
||||
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
|
||||
"-ip-masq", "-iptables", "-ipv6", "-selinux-enabled", "-userland-proxy"},
|
||||
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
|
||||
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
|
||||
"-detach", "d", "-rm", "-sig-proxy"},
|
||||
},
|
||||
"1.11": {
|
||||
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
|
||||
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
|
||||
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
|
||||
"-ip-masq", "-iptables", "-ipv6", "-raw-logs", "-selinux-enabled", "-userland-proxy"},
|
||||
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
|
||||
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
|
||||
"-detach", "d", "-rm", "-sig-proxy"},
|
||||
},
|
||||
"1.12": {
|
||||
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
|
||||
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
|
||||
"-disable-legacy-registry", "-help", "-icc", "-ip-forward",
|
||||
"-ip-masq", "-iptables", "-ipv6", "-live-restore", "-raw-logs",
|
||||
"-selinux-enabled", "-userland-proxy"},
|
||||
"create": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
|
||||
"run": []string{"-disable-content-trust", "-help", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
|
||||
"-detach", "d", "-no-healthcheck", "-rm", "-sig-proxy"},
|
||||
},
|
||||
"1.13": {
|
||||
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
|
||||
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
|
||||
"-disable-legacy-registry", "-experimental", "-help", "-icc", "-init", "-ip-forward",
|
||||
"-ip-masq", "-iptables", "-ipv6", "-live-restore", "-raw-logs",
|
||||
"-selinux-enabled", "-userland-proxy"},
|
||||
"create": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
|
||||
"run": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
|
||||
"-detach", "d", "-no-healthcheck", "-rm", "-sig-proxy"},
|
||||
},
|
||||
lastSupportedVersion: {
|
||||
"": []string{"-debug", "D", "-tls", "-tlsverify"}, // global options
|
||||
"daemon": []string{"-debug", "D", "-tls", "-tlsverify", // global options
|
||||
"-disable-legacy-registry", "-experimental", "-help", "-icc", "-init", "-ip-forward",
|
||||
"-ip-masq", "-iptables", "-ipv6", "-live-restore", "-raw-logs",
|
||||
"-selinux-enabled", "-userland-proxy"},
|
||||
"create": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t"},
|
||||
"run": []string{"-disable-content-trust", "-help", "-init", "-interactive", "i", "-oom-kill-disable",
|
||||
"-privileged", "-publish-all", "P", "-read-only", "-tty", "t", // same as "create"
|
||||
"-detach", "d", "-no-healthcheck", "-rm", "-sig-proxy"},
|
||||
},
|
||||
}
|
||||
|
||||
func ParseArgs(args []string, cmd ...string) (string, int, error) {
|
||||
if len(cmd) == 0 {
|
||||
cmd = append(cmd, "")
|
||||
}
|
||||
version, err := ClientVersion()
|
||||
if err != nil {
|
||||
return "", -1, err
|
||||
}
|
||||
vmaj := version[:strings.LastIndex(version, ".")]
|
||||
|
||||
cmdBooleanFlags, ok := booleanFlags[vmaj][cmd[0]]
|
||||
if !ok {
|
||||
// Docker is newer than supported version: use flags from last version we know.
|
||||
cmdBooleanFlags, _ = booleanFlags[lastSupportedVersion][cmd[0]]
|
||||
}
|
||||
|
||||
// Build the set of boolean Docker options for this command
|
||||
type void struct{}
|
||||
flags := make(map[string]void)
|
||||
for _, f := range cmdBooleanFlags {
|
||||
flags[f] = void{}
|
||||
}
|
||||
|
||||
for i := 0; i < len(args); i++ {
|
||||
arg := args[i]
|
||||
if arg[0] != '-' || arg == "-" {
|
||||
return args[i], i, nil
|
||||
}
|
||||
// Skip if current flag is in the form --option=value
|
||||
// Note: doesn't handle weird commands like `nvidia-docker run -vit=XYZ /tmp:/bar ubuntu`
|
||||
if strings.Contains(arg, "=") {
|
||||
continue
|
||||
}
|
||||
|
||||
arg = arg[1:]
|
||||
if arg[0] == '-' {
|
||||
// Long option: skip next argument if option is not boolean
|
||||
if _, ok := flags[arg]; !ok {
|
||||
i++
|
||||
}
|
||||
} else {
|
||||
// Short options: skip next argument if any option is not boolean
|
||||
for _, f := range arg {
|
||||
if _, ok := flags[string(f)]; !ok {
|
||||
i++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", -1, nil
|
||||
}
|
||||
|
||||
func Label(image, label string) (string, error) {
|
||||
format := fmt.Sprintf(`--format={{index .Config.Labels "%s"}}`, label)
|
||||
|
||||
b, err := docker(false, "inspect", format, image)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(bytes.Trim(b, " \n")), nil
|
||||
}
|
||||
|
||||
func VolumeInspect(name string) (string, error) {
|
||||
var vol []struct{ Name, Driver, Mountpoint string }
|
||||
|
||||
b, err := docker(false, "volume", "inspect", name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := json.Unmarshal(b, &vol); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return vol[0].Mountpoint, nil
|
||||
}
|
||||
|
||||
func ImageExists(image string) (bool, error) {
|
||||
_, err := docker(false, "inspect", "--type=image", image)
|
||||
if err != nil {
|
||||
// We can't know whether the image was missing or if the daemon was unreachable.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func ImagePull(image string) error {
|
||||
_, err := docker(true, "pull", image)
|
||||
return err
|
||||
}
|
||||
|
||||
func ClientVersion() (string, error) {
|
||||
b, err := docker(false, "version", "--format", "{{.Client.Version}}")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
version := string(b)
|
||||
var v1, v2, v3 int
|
||||
if _, err := fmt.Sscanf(version, "%d.%d.%d", &v1, &v2, &v3); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func Docker(arg ...string) error {
|
||||
cmd, err := exec.LookPath(dockerCmd[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
args := append(dockerCmd, arg...)
|
||||
|
||||
return syscall.Exec(cmd, args, os.Environ())
|
||||
}
|
|
@ -1,104 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package graceful
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
middleware "github.com/justinas/alice"
|
||||
"gopkg.in/tylerb/graceful.v1"
|
||||
)
|
||||
|
||||
const timeout = 5 * time.Second
|
||||
|
||||
type HTTPServer struct {
|
||||
sync.Mutex
|
||||
|
||||
network string
|
||||
router *http.ServeMux
|
||||
server *graceful.Server
|
||||
err error
|
||||
}
|
||||
|
||||
func recovery(handler http.Handler) http.Handler {
|
||||
f := func(w http.ResponseWriter, r *http.Request) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
http.Error(w, "internal error, check logs for details", http.StatusInternalServerError)
|
||||
}
|
||||
}()
|
||||
handler.ServeHTTP(w, r)
|
||||
}
|
||||
return http.HandlerFunc(f)
|
||||
}
|
||||
|
||||
func NewHTTPServer(net, addr string, mw ...middleware.Constructor) *HTTPServer {
|
||||
r := http.NewServeMux()
|
||||
|
||||
return &HTTPServer{
|
||||
network: net,
|
||||
router: r,
|
||||
server: &graceful.Server{
|
||||
Timeout: timeout,
|
||||
Server: &http.Server{
|
||||
Addr: addr,
|
||||
Handler: middleware.New(recovery).Append(mw...).Then(r),
|
||||
ReadTimeout: timeout,
|
||||
WriteTimeout: timeout,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *HTTPServer) Handle(method, route string, handler http.HandlerFunc) {
|
||||
f := func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != method {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
handler.ServeHTTP(w, r)
|
||||
}
|
||||
s.router.HandleFunc(route, f)
|
||||
}
|
||||
|
||||
func (s *HTTPServer) Serve() <-chan struct{} {
|
||||
if s.network == "unix" {
|
||||
os.Remove(s.server.Addr)
|
||||
}
|
||||
l, err := net.Listen(s.network, s.server.Addr)
|
||||
if err != nil {
|
||||
s.Lock()
|
||||
s.err = err
|
||||
s.Unlock()
|
||||
c := make(chan struct{})
|
||||
close(c)
|
||||
return c
|
||||
}
|
||||
|
||||
c := s.server.StopChan()
|
||||
go func() {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
err = s.server.Serve(l)
|
||||
if e, ok := err.(*net.OpError); !ok || (ok && e.Op != "accept") {
|
||||
s.err = err
|
||||
}
|
||||
}()
|
||||
return c
|
||||
}
|
||||
|
||||
func (s *HTTPServer) Stop() {
|
||||
s.server.Stop(timeout)
|
||||
}
|
||||
|
||||
func (s *HTTPServer) Error() error {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
return s.err
|
||||
}
|
|
@ -1,196 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const ldcachePath = "/etc/ld.so.cache"
|
||||
|
||||
const (
|
||||
magicString1 = "ld.so-1.7.0"
|
||||
magicString2 = "glibc-ld.so.cache"
|
||||
magicVersion = "1.1"
|
||||
)
|
||||
|
||||
const (
|
||||
flagTypeMask = 0x00ff
|
||||
flagTypeELF = 0x0001
|
||||
|
||||
flagArchMask = 0xff00
|
||||
flagArchI386 = 0x0000
|
||||
flagArchX8664 = 0x0300
|
||||
flagArchX32 = 0x0800
|
||||
flagArchPpc64le = 0x0500
|
||||
)
|
||||
|
||||
var ErrInvalidCache = errors.New("invalid ld.so.cache file")
|
||||
|
||||
type Header1 struct {
|
||||
Magic [len(magicString1) + 1]byte // include null delimiter
|
||||
NLibs uint32
|
||||
}
|
||||
|
||||
type Entry1 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
}
|
||||
|
||||
type Header2 struct {
|
||||
Magic [len(magicString2)]byte
|
||||
Version [len(magicVersion)]byte
|
||||
NLibs uint32
|
||||
TableSize uint32
|
||||
_ [3]uint32 // unused
|
||||
_ uint64 // force 8 byte alignment
|
||||
}
|
||||
|
||||
type Entry2 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
OSVersion uint32
|
||||
HWCap uint64
|
||||
}
|
||||
|
||||
type LDCache struct {
|
||||
*bytes.Reader
|
||||
|
||||
data, libs []byte
|
||||
header Header2
|
||||
entries []Entry2
|
||||
}
|
||||
|
||||
func Open() (*LDCache, error) {
|
||||
f, err := os.Open(ldcachePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d, err := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()),
|
||||
syscall.PROT_READ, syscall.MAP_PRIVATE)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache := &LDCache{data: d, Reader: bytes.NewReader(d)}
|
||||
return cache, cache.parse()
|
||||
}
|
||||
|
||||
func (c *LDCache) Close() error {
|
||||
return syscall.Munmap(c.data)
|
||||
}
|
||||
|
||||
func (c *LDCache) Magic() string {
|
||||
return string(c.header.Magic[:])
|
||||
}
|
||||
|
||||
func (c *LDCache) Version() string {
|
||||
return string(c.header.Version[:])
|
||||
}
|
||||
|
||||
func strn(b []byte, n int) string {
|
||||
return string(b[:n])
|
||||
}
|
||||
|
||||
func (c *LDCache) parse() error {
|
||||
var header Header1
|
||||
|
||||
// Check for the old format (< glibc-2.2)
|
||||
if c.Len() <= int(unsafe.Sizeof(header)) {
|
||||
return ErrInvalidCache
|
||||
}
|
||||
if strn(c.data, len(magicString1)) == magicString1 {
|
||||
if err := binary.Read(c, binary.LittleEndian, &header); err != nil {
|
||||
return err
|
||||
}
|
||||
n := int64(header.NLibs) * int64(unsafe.Sizeof(Entry1{}))
|
||||
offset, err := c.Seek(n, 1) // skip old entries
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n = (-offset) & int64(unsafe.Alignof(c.header)-1)
|
||||
_, err = c.Seek(n, 1) // skip padding
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.libs = c.data[c.Size()-int64(c.Len()):] // kv offsets start here
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.header); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Magic() != magicString2 || c.Version() != magicVersion {
|
||||
return ErrInvalidCache
|
||||
}
|
||||
c.entries = make([]Entry2, c.header.NLibs)
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.entries); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *LDCache) Lookup(libs ...string) (paths32, paths64 []string) {
|
||||
type void struct{}
|
||||
var paths *[]string
|
||||
|
||||
set := make(map[string]void)
|
||||
prefix := make([][]byte, len(libs))
|
||||
|
||||
for i := range libs {
|
||||
prefix[i] = []byte(libs[i])
|
||||
}
|
||||
for _, e := range c.entries {
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
paths = &paths64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
paths = &paths32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
lib := c.libs[e.Key:]
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
for _, p := range prefix {
|
||||
if bytes.HasPrefix(lib, p) {
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
path, err := filepath.EvalSymlinks(strn(value, n))
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if _, ok := set[path]; ok {
|
||||
break
|
||||
}
|
||||
set[path] = void{}
|
||||
*paths = append(*paths, path)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
|
@ -1,110 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvidia"
|
||||
)
|
||||
|
||||
var (
|
||||
PrintVersion bool
|
||||
ListenAddr string
|
||||
VolumesPath string
|
||||
SocketPath string
|
||||
|
||||
Version string
|
||||
Devices []nvidia.Device
|
||||
Volumes nvidia.VolumeMap
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.SetPrefix(os.Args[0] + " | ")
|
||||
|
||||
flag.BoolVar(&PrintVersion, "v", false, "Show the plugin version information")
|
||||
flag.StringVar(&ListenAddr, "l", "localhost:3476", "Server listen address")
|
||||
flag.StringVar(&VolumesPath, "d", "/var/lib/nvidia-docker/volumes", "Path where to store the volumes")
|
||||
flag.StringVar(&SocketPath, "s", "/run/docker/plugins", "Path to the plugin socket")
|
||||
}
|
||||
|
||||
func assert(err error) {
|
||||
if err != nil {
|
||||
log.Panicln("Error:", err)
|
||||
}
|
||||
}
|
||||
|
||||
func exit() {
|
||||
if err := recover(); err != nil {
|
||||
if _, ok := err.(runtime.Error); ok {
|
||||
log.Println(err)
|
||||
}
|
||||
if os.Getenv("NV_DEBUG") != "" {
|
||||
log.Printf("%s", debug.Stack())
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func main() {
|
||||
var err error
|
||||
|
||||
flag.Parse()
|
||||
defer exit()
|
||||
|
||||
if PrintVersion {
|
||||
fmt.Printf("NVIDIA Docker plugin: %s\n", Version)
|
||||
return
|
||||
}
|
||||
|
||||
log.Println("Loading NVIDIA unified memory")
|
||||
assert(nvidia.LoadUVM())
|
||||
|
||||
log.Println("Loading NVIDIA management library")
|
||||
assert(nvidia.Init())
|
||||
defer func() { assert(nvidia.Shutdown()) }()
|
||||
|
||||
log.Println("Discovering GPU devices")
|
||||
Devices, err = nvidia.LookupDevices()
|
||||
assert(err)
|
||||
|
||||
log.Println("Provisioning volumes at", VolumesPath)
|
||||
Volumes, err = nvidia.LookupVolumes(VolumesPath)
|
||||
assert(err)
|
||||
|
||||
plugin := NewPluginAPI(SocketPath)
|
||||
remote := NewRemoteAPI(ListenAddr)
|
||||
|
||||
log.Println("Serving plugin API at", SocketPath)
|
||||
log.Println("Serving remote API at", ListenAddr)
|
||||
p := plugin.Serve()
|
||||
r := remote.Serve()
|
||||
|
||||
join, joined := make(chan int, 2), 0
|
||||
L:
|
||||
for {
|
||||
select {
|
||||
case <-p:
|
||||
remote.Stop()
|
||||
p = nil
|
||||
join <- 1
|
||||
case <-r:
|
||||
plugin.Stop()
|
||||
r = nil
|
||||
join <- 1
|
||||
case j := <-join:
|
||||
if joined += j; joined == cap(join) {
|
||||
break L
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(plugin.Error())
|
||||
assert(remote.Error())
|
||||
log.Println("Successfully terminated")
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/graceful"
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvidia"
|
||||
)
|
||||
|
||||
const socketName = nvidia.DockerPlugin + ".sock"
|
||||
|
||||
type plugin interface {
|
||||
implement() string
|
||||
register(*PluginAPI)
|
||||
}
|
||||
|
||||
type PluginAPI struct {
|
||||
*graceful.HTTPServer
|
||||
|
||||
plugins []plugin
|
||||
}
|
||||
|
||||
func accept(handler http.Handler) http.Handler {
|
||||
f := func(w http.ResponseWriter, r *http.Request) {
|
||||
h := r.Header.Get("Accept")
|
||||
if h != "application/vnd.docker.plugins.v1.1+json" &&
|
||||
h != "application/vnd.docker.plugins.v1.2+json" {
|
||||
log.Println("Unsupported plugin API", h)
|
||||
w.WriteHeader(http.StatusNotAcceptable)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
|
||||
handler.ServeHTTP(w, r)
|
||||
}
|
||||
return http.HandlerFunc(f)
|
||||
}
|
||||
|
||||
func NewPluginAPI(prefix string) *PluginAPI {
|
||||
os.MkdirAll(prefix, 0700)
|
||||
|
||||
a := &PluginAPI{
|
||||
HTTPServer: graceful.NewHTTPServer("unix", path.Join(prefix, socketName), accept),
|
||||
}
|
||||
a.Handle("POST", "/Plugin.Activate", a.activate)
|
||||
|
||||
a.register(
|
||||
new(pluginVolume),
|
||||
)
|
||||
return a
|
||||
}
|
||||
|
||||
func (a *PluginAPI) register(plugins ...plugin) {
|
||||
for _, p := range plugins {
|
||||
p.register(a)
|
||||
a.plugins = append(a.plugins, p)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *PluginAPI) activate(resp http.ResponseWriter, req *http.Request) {
|
||||
r := struct{ Implements []string }{}
|
||||
|
||||
log.Println("Received activate request")
|
||||
r.Implements = make([]string, len(a.plugins))
|
||||
for i, p := range a.plugins {
|
||||
r.Implements[i] = p.implement()
|
||||
}
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
log.Println("Plugins activated", r.Implements)
|
||||
}
|
|
@ -1,207 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"path"
|
||||
"regexp"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvidia"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrVolumeBadFormat = errors.New("bad volume format")
|
||||
ErrVolumeUnsupported = errors.New("unsupported volume")
|
||||
ErrVolumeNotFound = errors.New("no such volume")
|
||||
ErrVolumeVersion = errors.New("invalid volume version")
|
||||
)
|
||||
|
||||
type pluginVolume struct{}
|
||||
|
||||
func (p *pluginVolume) implement() string { return "VolumeDriver" }
|
||||
|
||||
func (p *pluginVolume) register(api *PluginAPI) {
|
||||
prefix := "/" + p.implement()
|
||||
|
||||
api.Handle("POST", prefix+".Create", p.create)
|
||||
api.Handle("POST", prefix+".Remove", p.remove)
|
||||
api.Handle("POST", prefix+".Mount", p.mount)
|
||||
api.Handle("POST", prefix+".Unmount", p.unmount)
|
||||
api.Handle("POST", prefix+".Path", p.path)
|
||||
api.Handle("POST", prefix+".Get", p.get)
|
||||
api.Handle("POST", prefix+".List", p.list)
|
||||
api.Handle("POST", prefix+".Capabilities", p.capabilities)
|
||||
}
|
||||
|
||||
func fmtError(err error, vol string) *string {
|
||||
s := fmt.Sprintf("%v: %s", err, vol)
|
||||
return &s
|
||||
}
|
||||
|
||||
func getVolume(name string) (*nvidia.Volume, string, error) {
|
||||
re := regexp.MustCompile("^([a-zA-Z0-9_.-]+)_([0-9.]+)$")
|
||||
m := re.FindStringSubmatch(name)
|
||||
if len(m) != 3 {
|
||||
return nil, "", ErrVolumeBadFormat
|
||||
}
|
||||
volume, version := Volumes[m[1]], m[2]
|
||||
if volume == nil {
|
||||
return nil, "", ErrVolumeUnsupported
|
||||
}
|
||||
return volume, version, nil
|
||||
}
|
||||
|
||||
func (p *pluginVolume) create(resp http.ResponseWriter, req *http.Request) {
|
||||
var q struct{ Name string }
|
||||
var r struct{ Err *string }
|
||||
|
||||
assert(json.NewDecoder(req.Body).Decode(&q))
|
||||
log.Printf("Received create request for volume '%s'\n", q.Name)
|
||||
|
||||
volume, version, err := getVolume(q.Name)
|
||||
if err != nil {
|
||||
r.Err = fmtError(err, q.Name)
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
return
|
||||
}
|
||||
// The volume version requested needs to match the volume version in cache
|
||||
if version != volume.Version {
|
||||
r.Err = fmtError(ErrVolumeVersion, q.Name)
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
return
|
||||
}
|
||||
ok, err := volume.Exists()
|
||||
assert(err)
|
||||
if !ok {
|
||||
assert(volume.Create(nvidia.LinkStrategy{}))
|
||||
}
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
}
|
||||
|
||||
func (p *pluginVolume) remove(resp http.ResponseWriter, req *http.Request) {
|
||||
var q struct{ Name string }
|
||||
var r struct{ Err *string }
|
||||
|
||||
assert(json.NewDecoder(req.Body).Decode(&q))
|
||||
log.Printf("Received remove request for volume '%s'\n", q.Name)
|
||||
|
||||
volume, version, err := getVolume(q.Name)
|
||||
if err != nil {
|
||||
r.Err = fmtError(err, q.Name)
|
||||
} else {
|
||||
assert(volume.Remove(version))
|
||||
}
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
}
|
||||
|
||||
func (p *pluginVolume) mount(resp http.ResponseWriter, req *http.Request) {
|
||||
var q struct{ Name string }
|
||||
var r struct{ Mountpoint, Err *string }
|
||||
|
||||
assert(json.NewDecoder(req.Body).Decode(&q))
|
||||
log.Printf("Received mount request for volume '%s'\n", q.Name)
|
||||
|
||||
volume, version, err := getVolume(q.Name)
|
||||
if err != nil {
|
||||
r.Err = fmtError(err, q.Name)
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
return
|
||||
}
|
||||
ok, err := volume.Exists(version)
|
||||
assert(err)
|
||||
if !ok {
|
||||
r.Err = fmtError(ErrVolumeNotFound, q.Name)
|
||||
} else {
|
||||
p := path.Join(volume.Path, version)
|
||||
r.Mountpoint = &p
|
||||
}
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
}
|
||||
|
||||
func (p *pluginVolume) unmount(resp http.ResponseWriter, req *http.Request) {
|
||||
var q struct{ Name string }
|
||||
var r struct{ Err *string }
|
||||
|
||||
assert(json.NewDecoder(req.Body).Decode(&q))
|
||||
log.Printf("Received unmount request for volume '%s'\n", q.Name)
|
||||
|
||||
_, _, err := getVolume(q.Name)
|
||||
if err != nil {
|
||||
r.Err = fmtError(err, q.Name)
|
||||
}
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
}
|
||||
|
||||
func (p *pluginVolume) path(resp http.ResponseWriter, req *http.Request) {
|
||||
p.mount(resp, req)
|
||||
}
|
||||
|
||||
func (p *pluginVolume) get(resp http.ResponseWriter, req *http.Request) {
|
||||
type Volume struct{ Name, Mountpoint string }
|
||||
|
||||
var q struct{ Name string }
|
||||
var r struct {
|
||||
Volume *Volume
|
||||
Err *string
|
||||
}
|
||||
|
||||
assert(json.NewDecoder(req.Body).Decode(&q))
|
||||
|
||||
volume, version, err := getVolume(q.Name)
|
||||
if err != nil {
|
||||
r.Err = fmtError(err, q.Name)
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
return
|
||||
}
|
||||
ok, err := volume.Exists(version)
|
||||
assert(err)
|
||||
if !ok {
|
||||
r.Err = fmtError(ErrVolumeNotFound, q.Name)
|
||||
} else {
|
||||
r.Volume = &Volume{
|
||||
Name: q.Name,
|
||||
Mountpoint: path.Join(volume.Path, version),
|
||||
}
|
||||
}
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
}
|
||||
|
||||
func (p *pluginVolume) list(resp http.ResponseWriter, req *http.Request) {
|
||||
type Volume struct{ Name, Mountpoint string }
|
||||
|
||||
var r struct {
|
||||
Volumes []Volume
|
||||
Err *string
|
||||
}
|
||||
|
||||
for _, vol := range Volumes {
|
||||
versions, err := vol.ListVersions()
|
||||
assert(err)
|
||||
for _, v := range versions {
|
||||
r.Volumes = append(r.Volumes, Volume{
|
||||
Name: fmt.Sprintf("%s_%s", vol.Name, v),
|
||||
Mountpoint: path.Join(vol.Path, v),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
}
|
||||
|
||||
func (p *pluginVolume) capabilities(resp http.ResponseWriter, req *http.Request) {
|
||||
type Capabilities struct{ Scope string }
|
||||
var r struct {
|
||||
Capabilities Capabilities
|
||||
}
|
||||
|
||||
r.Capabilities = Capabilities{
|
||||
Scope: "local",
|
||||
}
|
||||
|
||||
assert(json.NewEncoder(resp).Encode(r))
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/graceful"
|
||||
)
|
||||
|
||||
type restapi interface {
|
||||
version() string
|
||||
|
||||
gpuInfo(http.ResponseWriter, *http.Request)
|
||||
gpuInfoJSON(http.ResponseWriter, *http.Request)
|
||||
gpuStatus(http.ResponseWriter, *http.Request)
|
||||
gpuStatusJSON(http.ResponseWriter, *http.Request)
|
||||
dockerCLI(http.ResponseWriter, *http.Request)
|
||||
dockerCLIJSON(http.ResponseWriter, *http.Request)
|
||||
mesosCLI(http.ResponseWriter, *http.Request)
|
||||
}
|
||||
|
||||
type RemoteAPI struct {
|
||||
*graceful.HTTPServer
|
||||
|
||||
apis []restapi
|
||||
}
|
||||
|
||||
func NewRemoteAPI(addr string) *RemoteAPI {
|
||||
a := &RemoteAPI{
|
||||
HTTPServer: graceful.NewHTTPServer("tcp", addr),
|
||||
}
|
||||
a.register(
|
||||
new(remoteV10),
|
||||
)
|
||||
return a
|
||||
}
|
||||
|
||||
func (a *RemoteAPI) register(apis ...restapi) {
|
||||
for i, api := range apis {
|
||||
prefix := "/" + api.version()
|
||||
|
||||
handlers:
|
||||
a.Handle("GET", prefix+"/gpu/info", api.gpuInfo)
|
||||
a.Handle("GET", prefix+"/gpu/info/json", api.gpuInfoJSON)
|
||||
a.Handle("GET", prefix+"/gpu/status", api.gpuStatus)
|
||||
a.Handle("GET", prefix+"/gpu/status/json", api.gpuStatusJSON)
|
||||
a.Handle("GET", prefix+"/docker/cli", api.dockerCLI)
|
||||
a.Handle("GET", prefix+"/docker/cli/json", api.dockerCLIJSON)
|
||||
a.Handle("GET", prefix+"/mesos/cli", api.mesosCLI)
|
||||
|
||||
if i == len(apis)-1 && prefix != "" {
|
||||
prefix = ""
|
||||
goto handlers
|
||||
}
|
||||
a.apis = append(a.apis, api)
|
||||
}
|
||||
}
|
|
@ -1,283 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/zlib"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
"text/template"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvidia"
|
||||
)
|
||||
|
||||
type remoteV10 struct{}
|
||||
|
||||
func (r *remoteV10) version() string { return "v1.0" }
|
||||
|
||||
func (r *remoteV10) gpuInfo(resp http.ResponseWriter, req *http.Request) {
|
||||
const tpl = `
|
||||
Driver version: {{driverVersion}}
|
||||
Supported CUDA version: {{cudaVersion}}
|
||||
{{range $i, $e := .}}
|
||||
Device #{{$i}}
|
||||
Model: {{or .Model "N/A"}}
|
||||
UUID: {{.UUID}}
|
||||
Path: {{.Path}}
|
||||
Family: {{or .Family "N/A"}}
|
||||
Arch: {{or .Arch "N/A"}}
|
||||
Cores: {{or .Cores "N/A"}}
|
||||
Power: {{if .Power}}{{.Power}} W{{else}}N/A{{end}}
|
||||
CPU Affinity: {{if .CPUAffinity}}NUMA node{{.CPUAffinity}}{{else}}N/A{{end}}
|
||||
PCI
|
||||
Bus ID: {{.PCI.BusID}}
|
||||
BAR1: {{if .PCI.BAR1}}{{.PCI.BAR1}} MiB{{else}}N/A{{end}}
|
||||
Bandwidth: {{if .PCI.Bandwidth}}{{.PCI.Bandwidth}} MB/s{{else}}N/A{{end}}
|
||||
Memory
|
||||
ECC: {{or .Memory.ECC "N/A"}}
|
||||
Global: {{if .Memory.Global}}{{.Memory.Global}} MiB{{else}}N/A{{end}}
|
||||
Constant: {{if .Memory.Constant}}{{.Memory.Constant}} KiB{{else}}N/A{{end}}
|
||||
Shared: {{if .Memory.Shared}}{{.Memory.Shared}} KiB{{else}}N/A{{end}}
|
||||
L2 Cache: {{if .Memory.L2Cache}}{{.Memory.L2Cache}} KiB{{else}}N/A{{end}}
|
||||
Bandwidth: {{if .Memory.Bandwidth}}{{.Memory.Bandwidth}} MB/s{{else}}N/A{{end}}
|
||||
Clocks
|
||||
Cores: {{if .Clocks.Cores}}{{.Clocks.Cores}} MHz{{else}}N/A{{end}}
|
||||
Memory: {{if .Clocks.Memory}}{{.Clocks.Memory}} MHz{{else}}N/A{{end}}
|
||||
P2P Available{{if not .Topology}}: None{{else}}{{range .Topology}}
|
||||
{{.BusID}} - {{(.Link.String)}}{{end}}{{end}}
|
||||
{{end}}
|
||||
`
|
||||
m := template.FuncMap{
|
||||
"driverVersion": nvidia.GetDriverVersion,
|
||||
"cudaVersion": nvidia.GetCUDAVersion,
|
||||
}
|
||||
t := template.Must(template.New("").Funcs(m).Parse(tpl))
|
||||
w := tabwriter.NewWriter(resp, 0, 4, 0, ' ', 0)
|
||||
|
||||
assert(t.Execute(w, Devices))
|
||||
assert(w.Flush())
|
||||
}
|
||||
|
||||
func (r *remoteV10) gpuInfoJSON(resp http.ResponseWriter, req *http.Request) {
|
||||
var body bytes.Buffer
|
||||
|
||||
writeGPUInfoJSON(&body)
|
||||
resp.Header().Set("Content-Type", "application/json")
|
||||
_, err := body.WriteTo(resp)
|
||||
assert(err)
|
||||
}
|
||||
|
||||
func writeGPUInfoJSON(wr io.Writer) {
|
||||
var err error
|
||||
|
||||
r := struct {
|
||||
Version struct{ Driver, CUDA string }
|
||||
Devices []nvidia.Device
|
||||
}{
|
||||
Devices: Devices,
|
||||
}
|
||||
r.Version.Driver, err = nvidia.GetDriverVersion()
|
||||
assert(err)
|
||||
r.Version.CUDA, err = nvidia.GetCUDAVersion()
|
||||
assert(err)
|
||||
|
||||
assert(json.NewEncoder(wr).Encode(r))
|
||||
}
|
||||
|
||||
func (r *remoteV10) gpuStatus(resp http.ResponseWriter, req *http.Request) {
|
||||
const tpl = `{{range $i, $e := .}}{{$s := (.Status)}}
|
||||
Device #{{$i}}
|
||||
Power: {{if and $s.Power .Power}}{{$s.Power}} / {{.Power}} W{{else}}N/A{{end}}
|
||||
Temperature: {{if $s.Temperature}}{{$s.Temperature}} °C{{else}}N/A{{end}}
|
||||
Utilization
|
||||
GPU: {{if $s.Utilization.GPU}}{{$s.Utilization.GPU}} %{{else}}N/A{{end}}
|
||||
Memory: {{if $s.Utilization.Memory}}{{$s.Utilization.Memory}} %{{else}}N/A{{end}}
|
||||
Encoder: {{if $s.Utilization.Encoder}}{{$s.Utilization.Encoder}} %{{else}}N/A{{end}}
|
||||
Decoder: {{if $s.Utilization.Decoder}}{{$s.Utilization.Decoder}} %{{else}}N/A{{end}}
|
||||
Memory
|
||||
Global: {{if and $s.Memory.GlobalUsed .Memory.Global}}{{$s.Memory.GlobalUsed}} / {{.Memory.Global}} MiB{{else}}N/A{{end}}
|
||||
ECC Errors
|
||||
L1 Cache: {{or $s.Memory.ECCErrors.L1Cache "N/A"}}
|
||||
L2 Cache: {{or $s.Memory.ECCErrors.L2Cache "N/A"}}
|
||||
Global: {{or $s.Memory.ECCErrors.Global "N/A"}}
|
||||
PCI
|
||||
BAR1: {{if and $s.PCI.BAR1Used .PCI.BAR1}}{{$s.PCI.BAR1Used}} / {{.PCI.BAR1}} MiB{{else}}N/A{{end}}
|
||||
Throughput
|
||||
RX: {{if $s.PCI.Throughput.RX}}{{$s.PCI.Throughput.RX}} MB/s{{else}}N/A{{end}}
|
||||
TX: {{if $s.PCI.Throughput.TX}}{{$s.PCI.Throughput.TX}} MB/s{{else}}N/A{{end}}
|
||||
Clocks
|
||||
Cores: {{if $s.Clocks.Cores}}{{$s.Clocks.Cores}} MHz{{else}}N/A{{end}}
|
||||
Memory: {{if $s.Clocks.Memory}}{{$s.Clocks.Memory}} MHz{{else}}N/A{{end}}
|
||||
Processes{{if not $s.Processes}}: None{{else}}{{range $s.Processes}}
|
||||
- PID: {{.PID}}
|
||||
Name: {{.Name}}
|
||||
Memory: {{.MemoryUsed}} MiB{{end}}{{end}}
|
||||
{{end}}
|
||||
`
|
||||
t := template.Must(template.New("").Parse(tpl))
|
||||
w := tabwriter.NewWriter(resp, 0, 4, 0, ' ', 0)
|
||||
|
||||
assert(t.Execute(w, Devices))
|
||||
assert(w.Flush())
|
||||
}
|
||||
|
||||
func (r *remoteV10) gpuStatusJSON(resp http.ResponseWriter, req *http.Request) {
|
||||
var body bytes.Buffer
|
||||
|
||||
writeGPUStatusJSON(&body)
|
||||
resp.Header().Set("Content-Type", "application/json")
|
||||
_, err := body.WriteTo(resp)
|
||||
assert(err)
|
||||
}
|
||||
|
||||
func writeGPUStatusJSON(wr io.Writer) {
|
||||
status := make([]*nvidia.DeviceStatus, 0, len(Devices))
|
||||
|
||||
for i := range Devices {
|
||||
s, err := Devices[i].Status()
|
||||
assert(err)
|
||||
status = append(status, s)
|
||||
}
|
||||
r := struct{ Devices []*nvidia.DeviceStatus }{status}
|
||||
assert(json.NewEncoder(wr).Encode(r))
|
||||
}
|
||||
|
||||
func (r *remoteV10) dockerCLI(resp http.ResponseWriter, req *http.Request) {
|
||||
const tpl = "--volume-driver={{.VolumeDriver}}{{range .Volumes}} --volume={{.}}{{end}}" +
|
||||
"{{range .Devices}} --device={{.}}{{end}}"
|
||||
|
||||
devs := strings.Split(req.FormValue("dev"), " ")
|
||||
vols := strings.Split(req.FormValue("vol"), " ")
|
||||
|
||||
args, err := dockerCLIArgs(devs, vols)
|
||||
if err != nil {
|
||||
http.Error(resp, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
t := template.Must(template.New("").Parse(tpl))
|
||||
assert(t.Execute(resp, args))
|
||||
}
|
||||
|
||||
func (r *remoteV10) dockerCLIJSON(resp http.ResponseWriter, req *http.Request) {
|
||||
devs := strings.Split(req.FormValue("dev"), " ")
|
||||
vols := strings.Split(req.FormValue("vol"), " ")
|
||||
|
||||
args, err := dockerCLIArgs(devs, vols)
|
||||
if err != nil {
|
||||
http.Error(resp, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
resp.Header().Set("Content-Type", "application/json")
|
||||
assert(json.NewEncoder(resp).Encode(args))
|
||||
}
|
||||
|
||||
type dockerArgs struct {
|
||||
VolumeDriver string
|
||||
Volumes []string
|
||||
Devices []string
|
||||
}
|
||||
|
||||
func dockerCLIArgs(devs, vols []string) (*dockerArgs, error) {
|
||||
cdevs, err := nvidia.GetControlDevicePaths()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
devs, err = dockerCLIDevices(devs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vols, err = dockerCLIVolumes(vols)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &dockerArgs{
|
||||
VolumeDriver: nvidia.DockerPlugin,
|
||||
Volumes: vols,
|
||||
Devices: append(cdevs, devs...),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func dockerCLIDevices(ids []string) ([]string, error) {
|
||||
devs := make([]string, 0, len(Devices))
|
||||
|
||||
if len(ids) == 1 && (ids[0] == "*" || ids[0] == "") {
|
||||
for i := range Devices {
|
||||
devs = append(devs, Devices[i].Path)
|
||||
}
|
||||
} else {
|
||||
d, err := nvidia.FilterDevices(Devices, ids)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for i := range d {
|
||||
devs = append(devs, d[i].Path)
|
||||
}
|
||||
}
|
||||
return devs, nil
|
||||
}
|
||||
|
||||
func dockerCLIVolumes(names []string) ([]string, error) {
|
||||
vols := make([]string, 0, len(Volumes))
|
||||
|
||||
drv, err := nvidia.GetDriverVersion()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(names) == 1 && (names[0] == "*" || names[0] == "") {
|
||||
for _, v := range Volumes {
|
||||
vols = append(vols, fmt.Sprintf("%s_%s:%s:%s", v.Name, drv, v.Mountpoint, v.MountOptions))
|
||||
}
|
||||
} else {
|
||||
for _, n := range names {
|
||||
v, ok := Volumes[n]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid volume: %s", n)
|
||||
}
|
||||
vols = append(vols, fmt.Sprintf("%s_%s:%s:%s", v.Name, drv, v.Mountpoint, v.MountOptions))
|
||||
}
|
||||
}
|
||||
return vols, nil
|
||||
}
|
||||
|
||||
func (r *remoteV10) mesosCLI(resp http.ResponseWriter, req *http.Request) {
|
||||
const format = "--attributes=gpus:%s --resources=gpus:{%s}"
|
||||
|
||||
// Generate Mesos attributes
|
||||
var b bytes.Buffer
|
||||
writeGPUInfoJSON(&b)
|
||||
attr := base64Encode(zlibCompress(b.Bytes()))
|
||||
|
||||
// Generate Mesos custom resources
|
||||
uuids := make([]string, 0, len(Devices))
|
||||
for i := range Devices {
|
||||
uuids = append(uuids, Devices[i].UUID)
|
||||
}
|
||||
res := strings.Join(uuids, ",")
|
||||
|
||||
_, err := fmt.Fprintf(resp, format, attr, res)
|
||||
assert(err)
|
||||
}
|
||||
|
||||
func zlibCompress(buf []byte) []byte {
|
||||
b := bytes.NewBuffer(make([]byte, 0, len(buf)))
|
||||
w := zlib.NewWriter(b)
|
||||
_, err := w.Write(buf)
|
||||
assert(err)
|
||||
err = w.Close()
|
||||
assert(err)
|
||||
return b.Bytes()
|
||||
}
|
||||
|
||||
func base64Encode(buf []byte) string {
|
||||
s := base64.URLEncoding.EncodeToString(buf)
|
||||
if n := len(buf) % 3; n > 0 {
|
||||
s = s[:len(s)-(3-n)] // remove padding (RFC 6920)
|
||||
}
|
||||
return s
|
||||
}
|
|
@ -1,125 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/docker"
|
||||
)
|
||||
|
||||
const (
|
||||
envDockerHost = "DOCKER_HOST"
|
||||
envNVDocker = "NV_DOCKER"
|
||||
envNVHost = "NV_HOST"
|
||||
envNVGPU = "NV_GPU"
|
||||
)
|
||||
|
||||
var ErrInvalidURI = errors.New("invalid remote host URI")
|
||||
|
||||
func LoadEnvironment() (err error) {
|
||||
Host, err = getHost()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
GPU = getGPU()
|
||||
cmd := getDocker()
|
||||
docker.SetCommand(cmd...)
|
||||
return
|
||||
}
|
||||
|
||||
func parseAddr(addr string) (host, sport, hport string) {
|
||||
re := regexp.MustCompile(`^(\[[0-9a-f.:]+\]|[0-9A-Za-z.\-_]+)?(:\d+)?:(\d+)?$`)
|
||||
|
||||
host, sport, hport = "localhost", "22", "3476"
|
||||
if addr == "" {
|
||||
return
|
||||
}
|
||||
m := re.FindStringSubmatch(addr)
|
||||
if m == nil {
|
||||
return "", "", ""
|
||||
}
|
||||
if m[1] != "" {
|
||||
host = m[1]
|
||||
}
|
||||
if m[2] != "" {
|
||||
sport = m[2][1:]
|
||||
}
|
||||
if m[3] != "" {
|
||||
hport = m[3]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func getHost() (*url.URL, error) {
|
||||
var env string
|
||||
|
||||
nvhost := os.Getenv(envNVHost)
|
||||
dhost := os.Getenv(envDockerHost)
|
||||
|
||||
if nvhost != "" {
|
||||
env = nvhost
|
||||
} else if dhost != "" {
|
||||
env = dhost
|
||||
} else {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if nvhost != "" && dhost == "" {
|
||||
log.Printf("Warning: %s is set but %s is not\n", envNVHost, envDockerHost)
|
||||
}
|
||||
|
||||
if ok, _ := regexp.MatchString("^[a-z0-9+.-]+://", env); !ok {
|
||||
env = "tcp://" + env
|
||||
}
|
||||
uri, err := url.Parse(env)
|
||||
if err != nil {
|
||||
return nil, ErrInvalidURI
|
||||
}
|
||||
if uri.Scheme == "unix" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
host, sport, hport := parseAddr(uri.Host)
|
||||
if host == "" {
|
||||
return nil, ErrInvalidURI
|
||||
}
|
||||
|
||||
switch uri.Scheme {
|
||||
case "tcp":
|
||||
uri.Scheme = "http"
|
||||
fallthrough
|
||||
case "http":
|
||||
if nvhost == "" && dhost != "" {
|
||||
hport = "3476"
|
||||
}
|
||||
uri.Host = fmt.Sprintf("%s:%s", host, hport)
|
||||
return uri, nil
|
||||
case "ssh":
|
||||
uri.Host = fmt.Sprintf("%s:%s", host, sport)
|
||||
uri.Opaque = fmt.Sprintf("localhost:%s", hport)
|
||||
if uri.User == nil {
|
||||
uri.User = url.UserPassword(os.Getenv("USER"), "")
|
||||
}
|
||||
return uri, nil
|
||||
}
|
||||
|
||||
return nil, ErrInvalidURI
|
||||
}
|
||||
|
||||
func getGPU() []string {
|
||||
return strings.FieldsFunc(os.Getenv(envNVGPU), func(c rune) bool {
|
||||
return c == ' ' || c == ','
|
||||
})
|
||||
}
|
||||
|
||||
func getDocker() []string {
|
||||
return strings.Fields(os.Getenv(envNVDocker))
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/docker"
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvidia"
|
||||
)
|
||||
|
||||
func GenerateLocalArgs(image string, vols []string) ([]string, error) {
|
||||
cv, err := nvidia.GetCUDAVersion()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cudaSupported(image, cv); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
d, err := devicesArgs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
v, err := volumesArgs(vols)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(d, v...), nil
|
||||
}
|
||||
|
||||
func devicesArgs() ([]string, error) {
|
||||
var args []string
|
||||
|
||||
cdevs, err := nvidia.GetControlDevicePaths()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for i := range cdevs {
|
||||
args = append(args, fmt.Sprintf("--device=%s", cdevs[i]))
|
||||
}
|
||||
|
||||
devs, err := nvidia.LookupDevices(nvidia.LookupMinimal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(GPU) == 0 {
|
||||
for i := range devs {
|
||||
args = append(args, fmt.Sprintf("--device=%s", devs[i].Path))
|
||||
}
|
||||
} else {
|
||||
devs, err := nvidia.FilterDevices(devs, GPU)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for i := range devs {
|
||||
args = append(args, fmt.Sprintf("--device=%s", devs[i].Path))
|
||||
}
|
||||
}
|
||||
return args, nil
|
||||
}
|
||||
|
||||
func volumesArgs(vols []string) ([]string, error) {
|
||||
args := make([]string, 0, len(vols))
|
||||
|
||||
drv, err := nvidia.GetDriverVersion()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, vol := range nvidia.Volumes {
|
||||
for _, v := range vols {
|
||||
if v == vol.Name {
|
||||
// Check if the volume exists locally otherwise fallback to using the plugin
|
||||
n := fmt.Sprintf("%s_%s", vol.Name, drv)
|
||||
if _, err := docker.VolumeInspect(n); err == nil {
|
||||
args = append(args, fmt.Sprintf("--volume=%s:%s:%s", n, vol.Mountpoint, vol.MountOptions))
|
||||
} else {
|
||||
args = append(args, fmt.Sprintf("--volume-driver=%s", nvidia.DockerPlugin))
|
||||
args = append(args, fmt.Sprintf("--volume=%s:%s:%s", n, vol.Mountpoint, vol.MountOptions))
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return args, nil
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/docker"
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvidia"
|
||||
)
|
||||
|
||||
var (
|
||||
Version string
|
||||
Host *url.URL
|
||||
GPU []string
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.SetPrefix(os.Args[0] + " | ")
|
||||
}
|
||||
|
||||
func assert(err error) {
|
||||
if err != nil {
|
||||
log.Panicln("Error:", err)
|
||||
}
|
||||
}
|
||||
|
||||
func exit() {
|
||||
if err := recover(); err != nil {
|
||||
if _, ok := err.(runtime.Error); ok {
|
||||
log.Println(err)
|
||||
}
|
||||
if os.Getenv("NV_DEBUG") != "" {
|
||||
log.Printf("%s", debug.Stack())
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func main() {
|
||||
args := os.Args[1:]
|
||||
defer exit()
|
||||
|
||||
assert(LoadEnvironment())
|
||||
|
||||
command, off, err := docker.ParseArgs(args)
|
||||
assert(err)
|
||||
|
||||
if command == "container" && off+1 < len(args) {
|
||||
command = args[off+1]
|
||||
off += 1
|
||||
}
|
||||
if command != "create" && command != "run" {
|
||||
if command == "version" {
|
||||
fmt.Printf("NVIDIA Docker: %s\n\n", Version)
|
||||
}
|
||||
assert(docker.Docker(args...))
|
||||
}
|
||||
|
||||
opt, i, err := docker.ParseArgs(args[off+1:], command)
|
||||
assert(err)
|
||||
off += i + 1
|
||||
|
||||
if (command == "create" || command == "run") && opt != "" {
|
||||
vols, err := VolumesNeeded(opt)
|
||||
assert(err)
|
||||
|
||||
if vols != nil {
|
||||
var nargs []string
|
||||
var err error
|
||||
|
||||
if Host != nil {
|
||||
nargs, err = GenerateRemoteArgs(opt, vols)
|
||||
} else {
|
||||
assert(nvidia.LoadUVM())
|
||||
assert(nvidia.Init())
|
||||
nargs, err = GenerateLocalArgs(opt, vols)
|
||||
nvidia.Shutdown()
|
||||
}
|
||||
assert(err)
|
||||
args = append(args[:off], append(nargs, args[off:]...)...)
|
||||
}
|
||||
}
|
||||
|
||||
assert(docker.Docker(args...))
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
"golang.org/x/crypto/ssh/agent"
|
||||
"golang.org/x/crypto/ssh/terminal"
|
||||
)
|
||||
|
||||
const timeout = 10 * time.Second
|
||||
|
||||
const (
|
||||
endpointInfo = "http://plugin/gpu/info/json"
|
||||
endpointCLI = "http://plugin/docker/cli"
|
||||
)
|
||||
|
||||
func GenerateRemoteArgs(image string, vols []string) ([]string, error) {
|
||||
var info struct {
|
||||
Version struct{ CUDA string }
|
||||
}
|
||||
|
||||
c := httpClient(Host)
|
||||
|
||||
r, err := c.Get(endpointInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r.Body.Close()
|
||||
if err := json.NewDecoder(r.Body).Decode(&info); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cudaSupported(image, info.Version.CUDA); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
uri := fmt.Sprintf("%s?vol=%s&dev=%s", endpointCLI,
|
||||
strings.Join(vols, "+"),
|
||||
strings.Join(GPU, "+"),
|
||||
)
|
||||
r2, err := c.Get(uri)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r2.Body.Close()
|
||||
|
||||
b, err := ioutil.ReadAll(r2.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return strings.Split(string(b), " "), nil
|
||||
}
|
||||
|
||||
func httpClient(addr *url.URL) *http.Client {
|
||||
dial := func(string, string) (net.Conn, error) {
|
||||
if addr.Scheme == "ssh" {
|
||||
c, err := ssh.Dial("tcp", addr.Host, &ssh.ClientConfig{
|
||||
User: addr.User.Username(),
|
||||
Auth: sshAuths(addr),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c.Dial("tcp", addr.Opaque)
|
||||
}
|
||||
return net.Dial("tcp", addr.Host)
|
||||
}
|
||||
|
||||
return &http.Client{
|
||||
Timeout: timeout,
|
||||
Transport: &http.Transport{Dial: dial},
|
||||
}
|
||||
}
|
||||
|
||||
func sshAuths(addr *url.URL) (methods []ssh.AuthMethod) {
|
||||
if sock := os.Getenv("SSH_AUTH_SOCK"); sock != "" {
|
||||
c, err := net.Dial("unix", sock)
|
||||
if err != nil {
|
||||
log.Println("Warning: failed to contact the local SSH agent")
|
||||
} else {
|
||||
auth := ssh.PublicKeysCallback(agent.NewClient(c).Signers)
|
||||
methods = append(methods, auth)
|
||||
}
|
||||
}
|
||||
auth := ssh.PasswordCallback(func() (string, error) {
|
||||
fmt.Printf("%s@%s password: ", addr.User.Username(), addr.Host)
|
||||
b, err := terminal.ReadPassword(int(syscall.Stdin))
|
||||
fmt.Print("\n")
|
||||
return string(b), err
|
||||
})
|
||||
methods = append(methods, auth)
|
||||
return
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/docker"
|
||||
)
|
||||
|
||||
const (
|
||||
labelCUDAVersion = "com.nvidia.cuda.version"
|
||||
labelVolumesNeeded = "com.nvidia.volumes.needed"
|
||||
)
|
||||
|
||||
func VolumesNeeded(image string) ([]string, error) {
|
||||
ok, err := docker.ImageExists(image)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !ok {
|
||||
if err = docker.ImagePull(image); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
label, err := docker.Label(image, labelVolumesNeeded)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if label == "" {
|
||||
return nil, nil
|
||||
}
|
||||
return strings.Split(label, " "), nil
|
||||
}
|
||||
|
||||
func cudaSupported(image, version string) error {
|
||||
var vmaj, vmin int
|
||||
var lmaj, lmin int
|
||||
|
||||
label, err := docker.Label(image, labelCUDAVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if label == "" {
|
||||
return nil
|
||||
}
|
||||
if _, err := fmt.Sscanf(version, "%d.%d", &vmaj, &vmin); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Sscanf(label, "%d.%d", &lmaj, &lmin); err != nil {
|
||||
return err
|
||||
}
|
||||
if lmaj > vmaj || (lmaj == vmaj && lmin > vmin) {
|
||||
return fmt.Errorf("unsupported CUDA version: driver %s < image %s", version, label)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,130 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/cuda"
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvml"
|
||||
)
|
||||
|
||||
type NVMLDevice nvml.Device
|
||||
type CUDADevice cuda.Device
|
||||
|
||||
type Device struct {
|
||||
*NVMLDevice
|
||||
*CUDADevice
|
||||
}
|
||||
|
||||
type NVMLDeviceStatus nvml.DeviceStatus
|
||||
|
||||
type DeviceStatus struct {
|
||||
*NVMLDeviceStatus
|
||||
}
|
||||
|
||||
type LookupStrategy uint
|
||||
|
||||
const (
|
||||
LookupMinimal LookupStrategy = iota
|
||||
)
|
||||
|
||||
func (d *Device) Status() (*DeviceStatus, error) {
|
||||
s, err := (*nvml.Device)(d.NVMLDevice).Status()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &DeviceStatus{(*NVMLDeviceStatus)(s)}, nil
|
||||
}
|
||||
|
||||
func LookupDevices(s ...LookupStrategy) (devs []Device, err error) {
|
||||
var i uint
|
||||
|
||||
n, err := nvml.GetDeviceCount()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
devs = make([]Device, 0, n)
|
||||
if n == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if len(s) == 1 && s[0] == LookupMinimal {
|
||||
for i = 0; i < n; i++ {
|
||||
d, err := nvml.NewDeviceLite(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
devs = append(devs, Device{(*NVMLDevice)(d), &CUDADevice{}})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for i = 0; i < n; i++ {
|
||||
nd, err := nvml.NewDevice(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cd, err := cuda.NewDevice(nd.PCI.BusID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
devs = append(devs, Device{(*NVMLDevice)(nd), (*CUDADevice)(cd)})
|
||||
}
|
||||
|
||||
for i = 0; i < n-1; i++ {
|
||||
for j := i + 1; j < n; j++ {
|
||||
ok, err := cuda.CanAccessPeer(
|
||||
(*cuda.Device)(devs[i].CUDADevice),
|
||||
(*cuda.Device)(devs[j].CUDADevice),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ok {
|
||||
l, err := nvml.GetP2PLink(
|
||||
(*nvml.Device)(devs[i].NVMLDevice),
|
||||
(*nvml.Device)(devs[j].NVMLDevice),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
devs[i].Topology = append(devs[i].Topology, nvml.P2PLink{devs[j].PCI.BusID, l})
|
||||
devs[j].Topology = append(devs[j].Topology, nvml.P2PLink{devs[i].PCI.BusID, l})
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func FilterDevices(devs []Device, ids []string) ([]Device, error) {
|
||||
type void struct{}
|
||||
set := make(map[int]void)
|
||||
|
||||
loop:
|
||||
for _, id := range ids {
|
||||
if strings.HasPrefix(id, "GPU-") {
|
||||
for i := range devs {
|
||||
if strings.HasPrefix(devs[i].UUID, id) {
|
||||
set[i] = void{}
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i, err := strconv.Atoi(id)
|
||||
if err == nil && i >= 0 && i < len(devs) {
|
||||
set[i] = void{}
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("invalid device: %s", id)
|
||||
}
|
||||
|
||||
d := make([]Device, 0, len(set))
|
||||
for i := range set {
|
||||
d = append(d, devs[i])
|
||||
}
|
||||
return d, nil
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/cuda"
|
||||
"github.com/NVIDIA/nvidia-docker/src/nvml"
|
||||
)
|
||||
|
||||
const (
|
||||
DockerPlugin = "nvidia-docker"
|
||||
DeviceCtl = "/dev/nvidiactl"
|
||||
DeviceUVM = "/dev/nvidia-uvm"
|
||||
DeviceUVMTools = "/dev/nvidia-uvm-tools"
|
||||
)
|
||||
|
||||
func Init() error {
|
||||
if err := os.Setenv("CUDA_DISABLE_UNIFIED_MEMORY", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Setenv("CUDA_CACHE_DISABLE", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Unsetenv("CUDA_VISIBLE_DEVICES"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nvml.Init()
|
||||
}
|
||||
|
||||
func Shutdown() error {
|
||||
return nvml.Shutdown()
|
||||
}
|
||||
|
||||
func LoadUVM() error {
|
||||
if exec.Command("nvidia-modprobe", "-u", "-c=0").Run() != nil {
|
||||
return errors.New("Could not load UVM kernel module. Is nvidia-modprobe installed?")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetDriverVersion() (string, error) {
|
||||
return nvml.GetDriverVersion()
|
||||
}
|
||||
|
||||
func GetCUDAVersion() (string, error) {
|
||||
return cuda.GetDriverVersion()
|
||||
}
|
||||
|
||||
func GetControlDevicePaths() ([]string, error) {
|
||||
devs := []string{DeviceCtl, DeviceUVM}
|
||||
|
||||
_, err := os.Stat(DeviceUVMTools)
|
||||
if os.IsNotExist(err) {
|
||||
return devs, nil
|
||||
}
|
||||
return append(devs, DeviceUVMTools), err
|
||||
}
|
|
@ -1,388 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"debug/elf"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-docker/src/ldcache"
|
||||
)
|
||||
|
||||
const (
|
||||
binDir = "bin"
|
||||
lib32Dir = "lib"
|
||||
lib64Dir = "lib64"
|
||||
)
|
||||
|
||||
type components map[string][]string
|
||||
|
||||
type volumeDir struct {
|
||||
name string
|
||||
files []string
|
||||
}
|
||||
|
||||
type VolumeInfo struct {
|
||||
Name string
|
||||
Mountpoint string
|
||||
MountOptions string
|
||||
Components components
|
||||
}
|
||||
|
||||
type Volume struct {
|
||||
*VolumeInfo
|
||||
|
||||
Path string
|
||||
Version string
|
||||
dirs []volumeDir
|
||||
}
|
||||
|
||||
type VolumeMap map[string]*Volume
|
||||
|
||||
type FileCloneStrategy interface {
|
||||
Clone(src, dst string) error
|
||||
}
|
||||
|
||||
type LinkStrategy struct{}
|
||||
|
||||
func (s LinkStrategy) Clone(src, dst string) error {
|
||||
return os.Link(src, dst)
|
||||
}
|
||||
|
||||
type LinkOrCopyStrategy struct{}
|
||||
|
||||
func (s LinkOrCopyStrategy) Clone(src, dst string) error {
|
||||
// Prefer hard link, fallback to copy
|
||||
err := os.Link(src, dst)
|
||||
if err != nil {
|
||||
err = Copy(src, dst)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func Copy(src, dst string) error {
|
||||
s, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer s.Close()
|
||||
|
||||
fi, err := s.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
d, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(d, s); err != nil {
|
||||
d.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
if err := d.Chmod(fi.Mode()); err != nil {
|
||||
d.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
return d.Close()
|
||||
}
|
||||
|
||||
var Volumes = []VolumeInfo{
|
||||
{
|
||||
"nvidia_driver",
|
||||
"/usr/local/nvidia",
|
||||
"ro",
|
||||
components{
|
||||
"binaries": {
|
||||
//"nvidia-modprobe", // Kernel module loader
|
||||
//"nvidia-settings", // X server settings
|
||||
//"nvidia-xconfig", // X xorg.conf editor
|
||||
"nvidia-cuda-mps-control", // Multi process service CLI
|
||||
"nvidia-cuda-mps-server", // Multi process service server
|
||||
"nvidia-debugdump", // GPU coredump utility
|
||||
"nvidia-persistenced", // Persistence mode utility
|
||||
"nvidia-smi", // System management interface
|
||||
},
|
||||
"libraries": {
|
||||
// ------- X11 -------
|
||||
|
||||
//"libnvidia-cfg.so", // GPU configuration (used by nvidia-xconfig)
|
||||
//"libnvidia-gtk2.so", // GTK2 (used by nvidia-settings)
|
||||
//"libnvidia-gtk3.so", // GTK3 (used by nvidia-settings)
|
||||
//"libnvidia-wfb.so", // Wrapped software rendering module for X server
|
||||
//"libglx.so", // GLX extension module for X server
|
||||
|
||||
// ----- Compute -----
|
||||
|
||||
"libnvidia-ml.so", // Management library
|
||||
"libcuda.so", // CUDA driver library
|
||||
"libnvidia-ptxjitcompiler.so", // PTX-SASS JIT compiler (used by libcuda)
|
||||
"libnvidia-fatbinaryloader.so", // fatbin loader (used by libcuda)
|
||||
"libnvidia-opencl.so", // NVIDIA OpenCL ICD
|
||||
"libnvidia-compiler.so", // NVVM-PTX compiler for OpenCL (used by libnvidia-opencl)
|
||||
//"libOpenCL.so", // OpenCL ICD loader
|
||||
|
||||
// ------ Video ------
|
||||
|
||||
"libvdpau_nvidia.so", // NVIDIA VDPAU ICD
|
||||
"libnvidia-encode.so", // Video encoder
|
||||
"libnvcuvid.so", // Video decoder
|
||||
"libnvidia-fbc.so", // Framebuffer capture
|
||||
"libnvidia-ifr.so", // OpenGL framebuffer capture
|
||||
|
||||
// ----- Graphic -----
|
||||
|
||||
// XXX In an ideal world we would only mount nvidia_* vendor specific libraries and
|
||||
// install ICD loaders inside the container. However, for backward compatibility reason
|
||||
// we need to mount everything. This will hopefully change once GLVND is well established.
|
||||
|
||||
"libGL.so", // OpenGL/GLX legacy _or_ compatibility wrapper (GLVND)
|
||||
"libGLX.so", // GLX ICD loader (GLVND)
|
||||
"libOpenGL.so", // OpenGL ICD loader (GLVND)
|
||||
"libGLESv1_CM.so", // OpenGL ES v1 common profile legacy _or_ ICD loader (GLVND)
|
||||
"libGLESv2.so", // OpenGL ES v2 legacy _or_ ICD loader (GLVND)
|
||||
"libEGL.so", // EGL ICD loader
|
||||
"libGLdispatch.so", // OpenGL dispatch (GLVND) (used by libOpenGL, libEGL and libGLES*)
|
||||
|
||||
"libGLX_nvidia.so", // OpenGL/GLX ICD (GLVND)
|
||||
"libEGL_nvidia.so", // EGL ICD (GLVND)
|
||||
"libGLESv2_nvidia.so", // OpenGL ES v2 ICD (GLVND)
|
||||
"libGLESv1_CM_nvidia.so", // OpenGL ES v1 common profile ICD (GLVND)
|
||||
"libnvidia-eglcore.so", // EGL core (used by libGLES* or libGLES*_nvidia and libEGL_nvidia)
|
||||
"libnvidia-egl-wayland.so", // EGL wayland extensions (used by libEGL_nvidia)
|
||||
"libnvidia-glcore.so", // OpenGL core (used by libGL or libGLX_nvidia)
|
||||
"libnvidia-tls.so", // Thread local storage (used by libGL or libGLX_nvidia)
|
||||
"libnvidia-glsi.so", // OpenGL system interaction (used by libEGL_nvidia)
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func blacklisted(file string, obj *elf.File) (bool, error) {
|
||||
lib := regexp.MustCompile(`^.*/lib([\w-]+)\.so[\d.]*$`)
|
||||
glcore := regexp.MustCompile(`libnvidia-e?glcore\.so`)
|
||||
gldispatch := regexp.MustCompile(`libGLdispatch\.so`)
|
||||
|
||||
if m := lib.FindStringSubmatch(file); m != nil {
|
||||
switch m[1] {
|
||||
|
||||
// Blacklist EGL/OpenGL libraries issued by other vendors
|
||||
case "EGL":
|
||||
fallthrough
|
||||
case "GLESv1_CM":
|
||||
fallthrough
|
||||
case "GLESv2":
|
||||
fallthrough
|
||||
case "GL":
|
||||
deps, err := obj.DynString(elf.DT_NEEDED)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
for _, d := range deps {
|
||||
if glcore.MatchString(d) || gldispatch.MatchString(d) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
|
||||
// Blacklist TLS libraries using the old ABI (!= 2.3.99)
|
||||
case "nvidia-tls":
|
||||
const abi = 0x6300000003
|
||||
s, err := obj.Section(".note.ABI-tag").Data()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return binary.LittleEndian.Uint64(s[24:]) != abi, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (v *Volume) Create(s FileCloneStrategy) (err error) {
|
||||
root := path.Join(v.Path, v.Version)
|
||||
if err = os.MkdirAll(root, 0755); err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
v.Remove()
|
||||
}
|
||||
}()
|
||||
|
||||
for _, d := range v.dirs {
|
||||
vpath := path.Join(root, d.name)
|
||||
if err := os.MkdirAll(vpath, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// For each file matching the volume components (blacklist excluded), create a hardlink/copy
|
||||
// of it inside the volume directory. We also need to create soname symlinks similar to what
|
||||
// ldconfig does since our volume will only show up at runtime.
|
||||
for _, f := range d.files {
|
||||
obj, err := elf.Open(f)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v", f, err)
|
||||
}
|
||||
defer obj.Close()
|
||||
|
||||
ok, err := blacklisted(f, obj)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v", f, err)
|
||||
}
|
||||
if ok {
|
||||
continue
|
||||
}
|
||||
|
||||
l := path.Join(vpath, path.Base(f))
|
||||
if err := s.Clone(f, l); err != nil {
|
||||
return err
|
||||
}
|
||||
soname, err := obj.DynString(elf.DT_SONAME)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %v", f, err)
|
||||
}
|
||||
if len(soname) > 0 {
|
||||
l = path.Join(vpath, soname[0])
|
||||
if err := os.Symlink(path.Base(f), l); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
// XXX Many applications (wrongly) assume that libcuda.so exists (e.g. with dlopen)
|
||||
// Hardcode the libcuda symlink for the time being.
|
||||
if strings.HasPrefix(soname[0], "libcuda") {
|
||||
l = strings.TrimRight(l, ".0123456789")
|
||||
if err := os.Symlink(path.Base(f), l); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// XXX GLVND requires this symlink for indirect GLX support
|
||||
// It won't be needed once we have an indirect GLX vendor neutral library.
|
||||
if strings.HasPrefix(soname[0], "libGLX_nvidia") {
|
||||
l = strings.Replace(l, "GLX_nvidia", "GLX_indirect", 1)
|
||||
if err := os.Symlink(path.Base(f), l); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *Volume) Remove(version ...string) error {
|
||||
vv := v.Version
|
||||
if len(version) == 1 {
|
||||
vv = version[0]
|
||||
}
|
||||
return os.RemoveAll(path.Join(v.Path, vv))
|
||||
}
|
||||
|
||||
func (v *Volume) Exists(version ...string) (bool, error) {
|
||||
vv := v.Version
|
||||
if len(version) == 1 {
|
||||
vv = version[0]
|
||||
}
|
||||
_, err := os.Stat(path.Join(v.Path, vv))
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return true, err
|
||||
}
|
||||
|
||||
func (v *Volume) ListVersions() ([]string, error) {
|
||||
dirs, err := ioutil.ReadDir(v.Path)
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
versions := make([]string, len(dirs))
|
||||
for i := range dirs {
|
||||
versions[i] = dirs[i].Name()
|
||||
}
|
||||
return versions, nil
|
||||
}
|
||||
|
||||
func which(bins ...string) ([]string, error) {
|
||||
paths := make([]string, 0, len(bins))
|
||||
|
||||
out, _ := exec.Command("which", bins...).Output()
|
||||
r := bufio.NewReader(bytes.NewBuffer(out))
|
||||
for {
|
||||
p, err := r.ReadString('\n')
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p = strings.TrimSpace(p); !path.IsAbs(p) {
|
||||
continue
|
||||
}
|
||||
path, err := filepath.EvalSymlinks(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
paths = append(paths, path)
|
||||
}
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func LookupVolumes(prefix string) (vols VolumeMap, err error) {
|
||||
drv, err := GetDriverVersion()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cache, err := ldcache.Open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if e := cache.Close(); err == nil {
|
||||
err = e
|
||||
}
|
||||
}()
|
||||
|
||||
vols = make(VolumeMap, len(Volumes))
|
||||
|
||||
for i := range Volumes {
|
||||
vol := &Volume{
|
||||
VolumeInfo: &Volumes[i],
|
||||
Path: path.Join(prefix, Volumes[i].Name),
|
||||
Version: drv,
|
||||
}
|
||||
|
||||
for t, c := range vol.Components {
|
||||
switch t {
|
||||
case "binaries":
|
||||
bins, err := which(c...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vol.dirs = append(vol.dirs, volumeDir{binDir, bins})
|
||||
case "libraries":
|
||||
libs32, libs64 := cache.Lookup(c...)
|
||||
vol.dirs = append(vol.dirs,
|
||||
volumeDir{lib32Dir, libs32},
|
||||
volumeDir{lib64Dir, libs64},
|
||||
)
|
||||
}
|
||||
}
|
||||
vols[vol.Name] = vol
|
||||
}
|
||||
return
|
||||
}
|
|
@ -1,311 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package nvml
|
||||
|
||||
// #cgo LDFLAGS: -ldl -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
// #include "nvml_dl.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
szDriver = C.NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE
|
||||
szName = C.NVML_DEVICE_NAME_BUFFER_SIZE
|
||||
szUUID = C.NVML_DEVICE_UUID_BUFFER_SIZE
|
||||
szProcs = 32
|
||||
szProcName = 64
|
||||
)
|
||||
|
||||
type handle struct{ dev C.nvmlDevice_t }
|
||||
|
||||
func uintPtr(c C.uint) *uint {
|
||||
i := uint(c)
|
||||
return &i
|
||||
}
|
||||
|
||||
func uint64Ptr(c C.ulonglong) *uint64 {
|
||||
i := uint64(c)
|
||||
return &i
|
||||
}
|
||||
|
||||
func stringPtr(c *C.char) *string {
|
||||
s := C.GoString(c)
|
||||
return &s
|
||||
}
|
||||
|
||||
func errorString(ret C.nvmlReturn_t) error {
|
||||
if ret == C.NVML_SUCCESS {
|
||||
return nil
|
||||
}
|
||||
err := C.GoString(C.nvmlErrorString(ret))
|
||||
return fmt.Errorf("nvml: %v", err)
|
||||
}
|
||||
|
||||
func init_() error {
|
||||
r := C.nvmlInit_dl()
|
||||
if r == C.NVML_ERROR_LIBRARY_NOT_FOUND {
|
||||
return errors.New("could not load NVML library")
|
||||
}
|
||||
return errorString(r)
|
||||
}
|
||||
|
||||
func shutdown() error {
|
||||
return errorString(C.nvmlShutdown_dl())
|
||||
}
|
||||
|
||||
func systemGetDriverVersion() (string, error) {
|
||||
var driver [szDriver]C.char
|
||||
|
||||
r := C.nvmlSystemGetDriverVersion(&driver[0], szDriver)
|
||||
return C.GoString(&driver[0]), errorString(r)
|
||||
}
|
||||
|
||||
func systemGetProcessName(pid uint) (string, error) {
|
||||
var proc [szProcName]C.char
|
||||
|
||||
r := C.nvmlSystemGetProcessName(C.uint(pid), &proc[0], szProcName)
|
||||
return C.GoString(&proc[0]), errorString(r)
|
||||
}
|
||||
|
||||
func deviceGetCount() (uint, error) {
|
||||
var n C.uint
|
||||
|
||||
r := C.nvmlDeviceGetCount(&n)
|
||||
return uint(n), errorString(r)
|
||||
}
|
||||
|
||||
func deviceGetHandleByIndex(idx uint) (handle, error) {
|
||||
var dev C.nvmlDevice_t
|
||||
|
||||
r := C.nvmlDeviceGetHandleByIndex(C.uint(idx), &dev)
|
||||
return handle{dev}, errorString(r)
|
||||
}
|
||||
|
||||
func deviceGetTopologyCommonAncestor(h1, h2 handle) (*uint, error) {
|
||||
var level C.nvmlGpuTopologyLevel_t
|
||||
|
||||
r := C.nvmlDeviceGetTopologyCommonAncestor_dl(h1.dev, h2.dev, &level)
|
||||
if r == C.NVML_ERROR_FUNCTION_NOT_FOUND || r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(C.uint(level)), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetName() (*string, error) {
|
||||
var name [szName]C.char
|
||||
|
||||
r := C.nvmlDeviceGetName(h.dev, &name[0], szName)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return stringPtr(&name[0]), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetUUID() (*string, error) {
|
||||
var uuid [szUUID]C.char
|
||||
|
||||
r := C.nvmlDeviceGetUUID(h.dev, &uuid[0], szUUID)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return stringPtr(&uuid[0]), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetPciInfo() (*string, error) {
|
||||
var pci C.nvmlPciInfo_t
|
||||
|
||||
r := C.nvmlDeviceGetPciInfo(h.dev, &pci)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return stringPtr(&pci.busId[0]), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetMinorNumber() (*uint, error) {
|
||||
var minor C.uint
|
||||
|
||||
r := C.nvmlDeviceGetMinorNumber(h.dev, &minor)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(minor), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetBAR1MemoryInfo() (*uint64, *uint64, error) {
|
||||
var bar1 C.nvmlBAR1Memory_t
|
||||
|
||||
r := C.nvmlDeviceGetBAR1MemoryInfo(h.dev, &bar1)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil, nil
|
||||
}
|
||||
return uint64Ptr(bar1.bar1Total), uint64Ptr(bar1.bar1Used), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetPowerManagementLimit() (*uint, error) {
|
||||
var power C.uint
|
||||
|
||||
r := C.nvmlDeviceGetPowerManagementLimit(h.dev, &power)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(power), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetMaxClockInfo() (*uint, *uint, error) {
|
||||
var sm, mem C.uint
|
||||
|
||||
r := C.nvmlDeviceGetMaxClockInfo(h.dev, C.NVML_CLOCK_SM, &sm)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil, nil
|
||||
}
|
||||
if r == C.NVML_SUCCESS {
|
||||
r = C.nvmlDeviceGetMaxClockInfo(h.dev, C.NVML_CLOCK_MEM, &mem)
|
||||
}
|
||||
return uintPtr(sm), uintPtr(mem), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetMaxPcieLinkGeneration() (*uint, error) {
|
||||
var link C.uint
|
||||
|
||||
r := C.nvmlDeviceGetMaxPcieLinkGeneration(h.dev, &link)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(link), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetMaxPcieLinkWidth() (*uint, error) {
|
||||
var width C.uint
|
||||
|
||||
r := C.nvmlDeviceGetMaxPcieLinkWidth(h.dev, &width)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(width), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetPowerUsage() (*uint, error) {
|
||||
var power C.uint
|
||||
|
||||
r := C.nvmlDeviceGetPowerUsage(h.dev, &power)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(power), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetTemperature() (*uint, error) {
|
||||
var temp C.uint
|
||||
|
||||
r := C.nvmlDeviceGetTemperature(h.dev, C.NVML_TEMPERATURE_GPU, &temp)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(temp), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetUtilizationRates() (*uint, *uint, error) {
|
||||
var usage C.nvmlUtilization_t
|
||||
|
||||
r := C.nvmlDeviceGetUtilizationRates(h.dev, &usage)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil, nil
|
||||
}
|
||||
return uintPtr(usage.gpu), uintPtr(usage.memory), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetEncoderUtilization() (*uint, error) {
|
||||
var usage, sampling C.uint
|
||||
|
||||
r := C.nvmlDeviceGetEncoderUtilization(h.dev, &usage, &sampling)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(usage), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetDecoderUtilization() (*uint, error) {
|
||||
var usage, sampling C.uint
|
||||
|
||||
r := C.nvmlDeviceGetDecoderUtilization(h.dev, &usage, &sampling)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uintPtr(usage), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetMemoryInfo() (*uint64, error) {
|
||||
var mem C.nvmlMemory_t
|
||||
|
||||
r := C.nvmlDeviceGetMemoryInfo(h.dev, &mem)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
return uint64Ptr(mem.used), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetClockInfo() (*uint, *uint, error) {
|
||||
var sm, mem C.uint
|
||||
|
||||
r := C.nvmlDeviceGetClockInfo(h.dev, C.NVML_CLOCK_SM, &sm)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil, nil
|
||||
}
|
||||
if r == C.NVML_SUCCESS {
|
||||
r = C.nvmlDeviceGetClockInfo(h.dev, C.NVML_CLOCK_MEM, &mem)
|
||||
}
|
||||
return uintPtr(sm), uintPtr(mem), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetMemoryErrorCounter() (*uint64, *uint64, *uint64, error) {
|
||||
var l1, l2, mem C.ulonglong
|
||||
|
||||
r := C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
|
||||
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_L1_CACHE, &l1)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil, nil, nil
|
||||
}
|
||||
if r == C.NVML_SUCCESS {
|
||||
r = C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
|
||||
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_L2_CACHE, &l2)
|
||||
}
|
||||
if r == C.NVML_SUCCESS {
|
||||
r = C.nvmlDeviceGetMemoryErrorCounter(h.dev, C.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
|
||||
C.NVML_VOLATILE_ECC, C.NVML_MEMORY_LOCATION_DEVICE_MEMORY, &mem)
|
||||
}
|
||||
return uint64Ptr(l1), uint64Ptr(l2), uint64Ptr(mem), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetPcieThroughput() (*uint, *uint, error) {
|
||||
var rx, tx C.uint
|
||||
|
||||
r := C.nvmlDeviceGetPcieThroughput(h.dev, C.NVML_PCIE_UTIL_RX_BYTES, &rx)
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil, nil
|
||||
}
|
||||
if r == C.NVML_SUCCESS {
|
||||
r = C.nvmlDeviceGetPcieThroughput(h.dev, C.NVML_PCIE_UTIL_TX_BYTES, &tx)
|
||||
}
|
||||
return uintPtr(rx), uintPtr(tx), errorString(r)
|
||||
}
|
||||
|
||||
func (h handle) deviceGetComputeRunningProcesses() ([]uint, []uint64, error) {
|
||||
var procs [szProcs]C.nvmlProcessInfo_t
|
||||
var count = C.uint(szProcs)
|
||||
|
||||
r := C.nvmlDeviceGetComputeRunningProcesses(h.dev, &count, &procs[0])
|
||||
if r == C.NVML_ERROR_NOT_SUPPORTED {
|
||||
return nil, nil, nil
|
||||
}
|
||||
n := int(count)
|
||||
pids := make([]uint, n)
|
||||
mems := make([]uint64, n)
|
||||
for i := 0; i < n; i++ {
|
||||
pids[i] = uint(procs[i].pid)
|
||||
mems[i] = uint64(procs[i].usedGpuMemory)
|
||||
}
|
||||
return pids, mems, errorString(r)
|
||||
}
|
381
src/nvml/nvml.go
381
src/nvml/nvml.go
|
@ -1,381 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
package nvml
|
||||
|
||||
// #include "nvml_dl.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrCPUAffinity = errors.New("failed to retrieve CPU affinity")
|
||||
ErrUnsupportedP2PLink = errors.New("unsupported P2P link type")
|
||||
ErrUnsupportedGPU = errors.New("unsupported GPU device")
|
||||
)
|
||||
|
||||
type P2PLinkType uint
|
||||
|
||||
const (
|
||||
P2PLinkUnknown P2PLinkType = iota
|
||||
P2PLinkCrossCPU
|
||||
P2PLinkSameCPU
|
||||
P2PLinkHostBridge
|
||||
P2PLinkMultiSwitch
|
||||
P2PLinkSingleSwitch
|
||||
P2PLinkSameBoard
|
||||
)
|
||||
|
||||
type P2PLink struct {
|
||||
BusID string
|
||||
Link P2PLinkType
|
||||
}
|
||||
|
||||
func (t P2PLinkType) String() string {
|
||||
switch t {
|
||||
case P2PLinkCrossCPU:
|
||||
return "Cross CPU socket"
|
||||
case P2PLinkSameCPU:
|
||||
return "Same CPU socket"
|
||||
case P2PLinkHostBridge:
|
||||
return "Host PCI bridge"
|
||||
case P2PLinkMultiSwitch:
|
||||
return "Multiple PCI switches"
|
||||
case P2PLinkSingleSwitch:
|
||||
return "Single PCI switch"
|
||||
case P2PLinkSameBoard:
|
||||
return "Same board"
|
||||
case P2PLinkUnknown:
|
||||
}
|
||||
return "N/A"
|
||||
}
|
||||
|
||||
type ClockInfo struct {
|
||||
Cores *uint
|
||||
Memory *uint
|
||||
}
|
||||
|
||||
type PCIInfo struct {
|
||||
BusID string
|
||||
BAR1 *uint64
|
||||
Bandwidth *uint
|
||||
}
|
||||
|
||||
type Device struct {
|
||||
handle
|
||||
|
||||
UUID string
|
||||
Path string
|
||||
Model *string
|
||||
Power *uint
|
||||
CPUAffinity *uint
|
||||
PCI PCIInfo
|
||||
Clocks ClockInfo
|
||||
Topology []P2PLink
|
||||
}
|
||||
|
||||
type UtilizationInfo struct {
|
||||
GPU *uint
|
||||
Memory *uint
|
||||
Encoder *uint
|
||||
Decoder *uint
|
||||
}
|
||||
|
||||
type PCIThroughputInfo struct {
|
||||
RX *uint
|
||||
TX *uint
|
||||
}
|
||||
|
||||
type PCIStatusInfo struct {
|
||||
BAR1Used *uint64
|
||||
Throughput PCIThroughputInfo
|
||||
}
|
||||
|
||||
type ECCErrorsInfo struct {
|
||||
L1Cache *uint64
|
||||
L2Cache *uint64
|
||||
Global *uint64
|
||||
}
|
||||
|
||||
type MemoryInfo struct {
|
||||
GlobalUsed *uint64
|
||||
ECCErrors ECCErrorsInfo
|
||||
}
|
||||
|
||||
type ProcessInfo struct {
|
||||
PID uint
|
||||
Name string
|
||||
MemoryUsed uint64
|
||||
}
|
||||
|
||||
type DeviceStatus struct {
|
||||
Power *uint
|
||||
Temperature *uint
|
||||
Utilization UtilizationInfo
|
||||
Memory MemoryInfo
|
||||
Clocks ClockInfo
|
||||
PCI PCIStatusInfo
|
||||
Processes []ProcessInfo
|
||||
}
|
||||
|
||||
func assert(err error) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func Init() error {
|
||||
return init_()
|
||||
}
|
||||
|
||||
func Shutdown() error {
|
||||
return shutdown()
|
||||
}
|
||||
|
||||
func GetDeviceCount() (uint, error) {
|
||||
return deviceGetCount()
|
||||
}
|
||||
|
||||
func GetDriverVersion() (string, error) {
|
||||
return systemGetDriverVersion()
|
||||
}
|
||||
|
||||
func numaNode(busid string) (uint, error) {
|
||||
b, err := ioutil.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", strings.ToLower(busid)))
|
||||
if err != nil {
|
||||
// XXX report node 0 if NUMA support isn't enabled
|
||||
return 0, nil
|
||||
}
|
||||
node, err := strconv.ParseInt(string(bytes.TrimSpace(b)), 10, 8)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%v: %v", ErrCPUAffinity, err)
|
||||
}
|
||||
if node < 0 {
|
||||
node = 0 // XXX report node 0 instead of NUMA_NO_NODE
|
||||
}
|
||||
return uint(node), nil
|
||||
}
|
||||
|
||||
func pciBandwidth(gen, width *uint) *uint {
|
||||
m := map[uint]uint{
|
||||
1: 250, // MB/s
|
||||
2: 500,
|
||||
3: 985,
|
||||
4: 1969,
|
||||
}
|
||||
if gen == nil || width == nil {
|
||||
return nil
|
||||
}
|
||||
bw := m[*gen] * *width
|
||||
return &bw
|
||||
}
|
||||
|
||||
func NewDevice(idx uint) (device *Device, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = r.(error)
|
||||
}
|
||||
}()
|
||||
|
||||
h, err := deviceGetHandleByIndex(idx)
|
||||
assert(err)
|
||||
model, err := h.deviceGetName()
|
||||
assert(err)
|
||||
uuid, err := h.deviceGetUUID()
|
||||
assert(err)
|
||||
minor, err := h.deviceGetMinorNumber()
|
||||
assert(err)
|
||||
power, err := h.deviceGetPowerManagementLimit()
|
||||
assert(err)
|
||||
busid, err := h.deviceGetPciInfo()
|
||||
assert(err)
|
||||
bar1, _, err := h.deviceGetBAR1MemoryInfo()
|
||||
assert(err)
|
||||
pcig, err := h.deviceGetMaxPcieLinkGeneration()
|
||||
assert(err)
|
||||
pciw, err := h.deviceGetMaxPcieLinkWidth()
|
||||
assert(err)
|
||||
ccore, cmem, err := h.deviceGetMaxClockInfo()
|
||||
assert(err)
|
||||
|
||||
if minor == nil || busid == nil || uuid == nil {
|
||||
return nil, ErrUnsupportedGPU
|
||||
}
|
||||
path := fmt.Sprintf("/dev/nvidia%d", *minor)
|
||||
node, err := numaNode(*busid)
|
||||
assert(err)
|
||||
|
||||
device = &Device{
|
||||
handle: h,
|
||||
UUID: *uuid,
|
||||
Path: path,
|
||||
Model: model,
|
||||
Power: power,
|
||||
CPUAffinity: &node,
|
||||
PCI: PCIInfo{
|
||||
BusID: *busid,
|
||||
BAR1: bar1,
|
||||
Bandwidth: pciBandwidth(pcig, pciw), // MB/s
|
||||
},
|
||||
Clocks: ClockInfo{
|
||||
Cores: ccore, // MHz
|
||||
Memory: cmem, // MHz
|
||||
},
|
||||
}
|
||||
if power != nil {
|
||||
*device.Power /= 1000 // W
|
||||
}
|
||||
if bar1 != nil {
|
||||
*device.PCI.BAR1 /= 1024 * 1024 // MiB
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func NewDeviceLite(idx uint) (device *Device, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = r.(error)
|
||||
}
|
||||
}()
|
||||
|
||||
h, err := deviceGetHandleByIndex(idx)
|
||||
assert(err)
|
||||
uuid, err := h.deviceGetUUID()
|
||||
assert(err)
|
||||
minor, err := h.deviceGetMinorNumber()
|
||||
assert(err)
|
||||
busid, err := h.deviceGetPciInfo()
|
||||
assert(err)
|
||||
|
||||
if minor == nil || busid == nil || uuid == nil {
|
||||
return nil, ErrUnsupportedGPU
|
||||
}
|
||||
path := fmt.Sprintf("/dev/nvidia%d", *minor)
|
||||
|
||||
device = &Device{
|
||||
handle: h,
|
||||
UUID: *uuid,
|
||||
Path: path,
|
||||
PCI: PCIInfo{
|
||||
BusID: *busid,
|
||||
},
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (d *Device) Status() (status *DeviceStatus, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = r.(error)
|
||||
}
|
||||
}()
|
||||
|
||||
power, err := d.deviceGetPowerUsage()
|
||||
assert(err)
|
||||
temp, err := d.deviceGetTemperature()
|
||||
assert(err)
|
||||
ugpu, umem, err := d.deviceGetUtilizationRates()
|
||||
assert(err)
|
||||
uenc, err := d.deviceGetEncoderUtilization()
|
||||
assert(err)
|
||||
udec, err := d.deviceGetDecoderUtilization()
|
||||
assert(err)
|
||||
mem, err := d.deviceGetMemoryInfo()
|
||||
assert(err)
|
||||
ccore, cmem, err := d.deviceGetClockInfo()
|
||||
assert(err)
|
||||
_, bar1, err := d.deviceGetBAR1MemoryInfo()
|
||||
assert(err)
|
||||
pids, pmems, err := d.deviceGetComputeRunningProcesses()
|
||||
assert(err)
|
||||
el1, el2, emem, err := d.deviceGetMemoryErrorCounter()
|
||||
assert(err)
|
||||
pcirx, pcitx, err := d.deviceGetPcieThroughput()
|
||||
assert(err)
|
||||
|
||||
status = &DeviceStatus{
|
||||
Power: power,
|
||||
Temperature: temp, // °C
|
||||
Utilization: UtilizationInfo{
|
||||
GPU: ugpu, // %
|
||||
Memory: umem, // %
|
||||
Encoder: uenc, // %
|
||||
Decoder: udec, // %
|
||||
},
|
||||
Memory: MemoryInfo{
|
||||
GlobalUsed: mem,
|
||||
ECCErrors: ECCErrorsInfo{
|
||||
L1Cache: el1,
|
||||
L2Cache: el2,
|
||||
Global: emem,
|
||||
},
|
||||
},
|
||||
Clocks: ClockInfo{
|
||||
Cores: ccore, // MHz
|
||||
Memory: cmem, // MHz
|
||||
},
|
||||
PCI: PCIStatusInfo{
|
||||
BAR1Used: bar1,
|
||||
Throughput: PCIThroughputInfo{
|
||||
RX: pcirx,
|
||||
TX: pcitx,
|
||||
},
|
||||
},
|
||||
}
|
||||
if power != nil {
|
||||
*status.Power /= 1000 // W
|
||||
}
|
||||
if mem != nil {
|
||||
*status.Memory.GlobalUsed /= 1024 * 1024 // MiB
|
||||
}
|
||||
if bar1 != nil {
|
||||
*status.PCI.BAR1Used /= 1024 * 1024 // MiB
|
||||
}
|
||||
if pcirx != nil {
|
||||
*status.PCI.Throughput.RX /= 1000 // MB/s
|
||||
}
|
||||
if pcitx != nil {
|
||||
*status.PCI.Throughput.TX /= 1000 // MB/s
|
||||
}
|
||||
for i := range pids {
|
||||
name, err := systemGetProcessName(pids[i])
|
||||
assert(err)
|
||||
status.Processes = append(status.Processes, ProcessInfo{
|
||||
PID: pids[i],
|
||||
Name: name,
|
||||
MemoryUsed: pmems[i] / (1024 * 1024), // MiB
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func GetP2PLink(dev1, dev2 *Device) (link P2PLinkType, err error) {
|
||||
level, err := deviceGetTopologyCommonAncestor(dev1.handle, dev2.handle)
|
||||
if err != nil || level == nil {
|
||||
return P2PLinkUnknown, err
|
||||
}
|
||||
|
||||
switch *level {
|
||||
case C.NVML_TOPOLOGY_INTERNAL:
|
||||
link = P2PLinkSameBoard
|
||||
case C.NVML_TOPOLOGY_SINGLE:
|
||||
link = P2PLinkSingleSwitch
|
||||
case C.NVML_TOPOLOGY_MULTIPLE:
|
||||
link = P2PLinkMultiSwitch
|
||||
case C.NVML_TOPOLOGY_HOSTBRIDGE:
|
||||
link = P2PLinkHostBridge
|
||||
case C.NVML_TOPOLOGY_CPU:
|
||||
link = P2PLinkSameCPU
|
||||
case C.NVML_TOPOLOGY_SYSTEM:
|
||||
link = P2PLinkCrossCPU
|
||||
default:
|
||||
err = ErrUnsupportedP2PLink
|
||||
}
|
||||
return
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include "nvml_dl.h"
|
||||
|
||||
#define DLSYM(x, sym) \
|
||||
do { \
|
||||
dlerror(); \
|
||||
x = dlsym(handle, #sym); \
|
||||
if (dlerror() != NULL) { \
|
||||
return (NVML_ERROR_FUNCTION_NOT_FOUND); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
typedef nvmlReturn_t (*nvmlSym_t)();
|
||||
|
||||
static void *handle;
|
||||
|
||||
nvmlReturn_t NVML_DL(nvmlInit)(void)
|
||||
{
|
||||
handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY | RTLD_GLOBAL);
|
||||
if (handle == NULL) {
|
||||
return (NVML_ERROR_LIBRARY_NOT_FOUND);
|
||||
}
|
||||
return (nvmlInit());
|
||||
}
|
||||
|
||||
nvmlReturn_t NVML_DL(nvmlShutdown)(void)
|
||||
{
|
||||
nvmlReturn_t r = nvmlShutdown();
|
||||
if (r != NVML_SUCCESS) {
|
||||
return (r);
|
||||
}
|
||||
return (dlclose(handle) ? NVML_ERROR_UNKNOWN : NVML_SUCCESS);
|
||||
}
|
||||
|
||||
nvmlReturn_t NVML_DL(nvmlDeviceGetTopologyCommonAncestor)(
|
||||
nvmlDevice_t dev1, nvmlDevice_t dev2, nvmlGpuTopologyLevel_t *info)
|
||||
{
|
||||
nvmlSym_t sym;
|
||||
|
||||
DLSYM(sym, nvmlDeviceGetTopologyCommonAncestor);
|
||||
return ((*sym)(dev1, dev2, info));
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
// Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
#ifndef _NVML_DL_H_
|
||||
#define _NVML_DL_H_
|
||||
|
||||
#include <nvml.h>
|
||||
|
||||
#define NVML_DL(x) x##_dl
|
||||
|
||||
extern nvmlReturn_t NVML_DL(nvmlInit)(void);
|
||||
extern nvmlReturn_t NVML_DL(nvmlShutdown)(void);
|
||||
extern nvmlReturn_t NVML_DL(nvmlDeviceGetTopologyCommonAncestor)(
|
||||
nvmlDevice_t, nvmlDevice_t, nvmlGpuTopologyLevel_t *);
|
||||
|
||||
#endif // _NVML_DL_H_
|
Loading…
Reference in New Issue