mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-16 23:03:40 +01:00
Add support for running a container with a private network interface
For example, the following sets up a container named ‘foo’. The container will have a single network interface eth0, with IP address 10.231.136.2. The host will have an interface c-foo with IP address 10.231.136.1. systemd.containers.foo = { privateNetwork = true; hostAddress = "10.231.136.1"; localAddress = "10.231.136.2"; config = { services.openssh.enable = true; }; }; With ‘privateNetwork = true’, the container has the CAP_NET_ADMIN capability, allowing it to do arbitrary network configuration, such as setting up firewall rules. This is secure because it cannot touch the interfaces of the host. The helper program ‘run-in-netns’ is needed at the moment because ‘ip netns exec’ doesn't quite do the right thing (it remounts /sys without bind-mounting the original /sys/fs/cgroups).
This commit is contained in:
parent
ac215779dd
commit
895bcdd1cb
3 changed files with 155 additions and 13 deletions
|
@ -34,8 +34,9 @@ let
|
|||
|
||||
# Ignore peth* devices; on Xen, they're renamed physical
|
||||
# Ethernet cards used for bridging. Likewise for vif* and tap*
|
||||
# (Xen) and virbr* and vnet* (libvirt).
|
||||
denyinterfaces ${toString ignoredInterfaces} peth* vif* tap* tun* virbr* vnet* vboxnet*
|
||||
# (Xen) and virbr* and vnet* (libvirt) and c-* and ctmp-* (NixOS
|
||||
# containers).
|
||||
denyinterfaces ${toString ignoredInterfaces} peth* vif* tap* tun* virbr* vnet* vboxnet* c-* ctmp-*
|
||||
|
||||
${config.networking.dhcpcd.extraConfig}
|
||||
'';
|
||||
|
|
|
@ -2,6 +2,20 @@
|
|||
|
||||
with pkgs.lib;
|
||||
|
||||
let
|
||||
|
||||
runInNetns = pkgs.stdenv.mkDerivation {
|
||||
name = "run-in-netns";
|
||||
unpackPhase = "true";
|
||||
buildPhase = ''
|
||||
mkdir -p $out/bin
|
||||
gcc ${./run-in-netns.c} -o $out/bin/run-in-netns
|
||||
'';
|
||||
installPhase = "true";
|
||||
};
|
||||
|
||||
in
|
||||
|
||||
{
|
||||
options = {
|
||||
|
||||
|
@ -45,6 +59,39 @@ with pkgs.lib;
|
|||
'';
|
||||
};
|
||||
|
||||
privateNetwork = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to give the container its own private virtual
|
||||
Ethernet interface. The interface is called
|
||||
<literal>eth0</literal>, and is hooked up to the interface
|
||||
<literal>c-<replaceable>container-name</replaceable></literal>
|
||||
on the host. If this option is not set, then the
|
||||
container shares the network interfaces of the host,
|
||||
and can bind to any port on any interface.
|
||||
'';
|
||||
};
|
||||
|
||||
hostAddress = mkOption {
|
||||
type = types.nullOr types.string;
|
||||
default = null;
|
||||
example = "10.231.136.1";
|
||||
description = ''
|
||||
The IPv4 address assigned to the host interface.
|
||||
'';
|
||||
};
|
||||
|
||||
localAddress = mkOption {
|
||||
type = types.nullOr types.string;
|
||||
default = null;
|
||||
example = "10.231.136.2";
|
||||
description = ''
|
||||
The IPv4 address assigned to <literal>eth0</literal>
|
||||
in the container.
|
||||
'';
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
config = mkMerge
|
||||
|
@ -97,32 +144,70 @@ with pkgs.lib;
|
|||
|
||||
config = {
|
||||
|
||||
systemd.services = mapAttrs' (name: container: nameValuePair "container-${name}"
|
||||
{ description = "Container '${name}'";
|
||||
systemd.services = mapAttrs' (name: cfg:
|
||||
let
|
||||
# FIXME: interface names have a maximum length.
|
||||
ifaceHost = "c-${name}";
|
||||
ifaceCont = "ctmp-${name}";
|
||||
ns = "net-${name}";
|
||||
in
|
||||
nameValuePair "container-${name}" {
|
||||
description = "Container '${name}'";
|
||||
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
|
||||
unitConfig.RequiresMountsFor = [ container.root ];
|
||||
unitConfig.RequiresMountsFor = [ cfg.root ];
|
||||
|
||||
path = [ pkgs.iproute ];
|
||||
|
||||
preStart =
|
||||
''
|
||||
mkdir -p -m 0755 ${container.root}/etc
|
||||
if ! [ -e ${container.root}/etc/os-release ]; then
|
||||
touch ${container.root}/etc/os-release
|
||||
mkdir -p -m 0755 ${cfg.root}/etc
|
||||
if ! [ -e ${cfg.root}/etc/os-release ]; then
|
||||
touch ${cfg.root}/etc/os-release
|
||||
fi
|
||||
|
||||
mkdir -p -m 0755 \
|
||||
/nix/var/nix/profiles/per-container/${name} \
|
||||
/nix/var/nix/gcroots/per-container/${name}
|
||||
''
|
||||
|
||||
+ optionalString cfg.privateNetwork ''
|
||||
# Cleanup from last time.
|
||||
ip netns del ${ns} 2> /dev/null || true
|
||||
ip link del ${ifaceHost} 2> /dev/null || true
|
||||
ip link del ${ifaceCont} 2> /dev/null || true
|
||||
|
||||
# Create a pair of virtual ethernet devices. On the host,
|
||||
# we get ‘c-<container-name’, and on the guest, we get
|
||||
# ‘eth0’.
|
||||
set -x
|
||||
ip link add ${ifaceHost} type veth peer name ${ifaceCont}
|
||||
ip netns add ${ns}
|
||||
ip link set ${ifaceCont} netns ${ns}
|
||||
ip netns exec ${ns} ip link set ${ifaceCont} name eth0
|
||||
ip netns exec ${ns} ip link set dev eth0 up
|
||||
ip link set dev ${ifaceHost} up
|
||||
${optionalString (cfg.hostAddress != null) ''
|
||||
ip addr add ${cfg.hostAddress} dev ${ifaceHost}
|
||||
ip netns exec ${ns} ip route add ${cfg.hostAddress} dev eth0
|
||||
ip netns exec ${ns} ip route add default via ${cfg.hostAddress}
|
||||
''}
|
||||
${optionalString (cfg.localAddress != null) ''
|
||||
ip netns exec ${ns} ip addr add ${cfg.localAddress} dev eth0
|
||||
ip route add ${cfg.localAddress} dev ${ifaceHost}
|
||||
''}
|
||||
'';
|
||||
|
||||
serviceConfig.ExecStart =
|
||||
"${config.systemd.package}/bin/systemd-nspawn"
|
||||
+ " -M ${name} -D ${container.root}"
|
||||
(optionalString cfg.privateNetwork "${runInNetns}/bin/run-in-netns ${ns} ")
|
||||
+ "${config.systemd.package}/bin/systemd-nspawn"
|
||||
+ (optionalString cfg.privateNetwork " --capability=CAP_NET_ADMIN")
|
||||
+ " -M ${name} -D ${cfg.root}"
|
||||
+ " --bind-ro=/nix/store --bind-ro=/nix/var/nix/db --bind-ro=/nix/var/nix/daemon-socket"
|
||||
+ " --bind=/nix/var/nix/profiles/per-container/${name}:/nix/var/nix/profiles"
|
||||
+ " --bind=/nix/var/nix/gcroots/per-container/${name}:/nix/var/nix/gcroots"
|
||||
+ " ${container.path}/init";
|
||||
+ " ${cfg.path}/init";
|
||||
|
||||
preStop =
|
||||
''
|
||||
|
@ -146,10 +231,16 @@ with pkgs.lib;
|
|||
|
||||
serviceConfig.ExecReload =
|
||||
"${pkgs.bash}/bin/bash -c '"
|
||||
+ "echo ${container.path}/bin/switch-to-configuration test "
|
||||
+ "| ${pkgs.socat}/bin/socat unix:${container.root}/var/lib/root-shell.socket -'";
|
||||
+ "echo ${cfg.path}/bin/switch-to-configuration test "
|
||||
+ "| ${pkgs.socat}/bin/socat unix:${cfg.root}/var/lib/root-shell.socket -'";
|
||||
|
||||
}) config.systemd.containers;
|
||||
|
||||
# Generate /etc/hosts entries for the containers.
|
||||
networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
|
||||
''
|
||||
${cfg.localAddress} ${name}.containers
|
||||
'') config.systemd.containers);
|
||||
|
||||
};
|
||||
}
|
||||
|
|
50
nixos/modules/virtualisation/run-in-netns.c
Normal file
50
nixos/modules/virtualisation/run-in-netns.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mount.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/limits.h>
|
||||
|
||||
int main(int argc, char * * argv)
|
||||
{
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "%s: missing arguments\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char nsPath[PATH_MAX];
|
||||
|
||||
sprintf(nsPath, "/run/netns/%s", argv[1]);
|
||||
|
||||
int fd = open(nsPath, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "%s: opening network namespace: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (setns(fd, CLONE_NEWNET) == -1) {
|
||||
fprintf(stderr, "%s: setting network namespace: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
umount2(nsPath, MNT_DETACH);
|
||||
if (unlink(nsPath) == -1) {
|
||||
fprintf(stderr, "%s: unlinking network namespace: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* FIXME: Remount /sys so that /sys/class/net reflects the
|
||||
interfaces visible in the network namespace. This requires
|
||||
bind-mounting /sys/fs/cgroups etc. */
|
||||
|
||||
execv(argv[2], argv + 2);
|
||||
fprintf(stderr, "%s: running command: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
Loading…
Reference in a new issue