Containers: Use systemd-nspawn's --network-veth flag
Note that this causes the name of the host-side interface to change from c-<name> to ve-<name>.
This commit is contained in:
parent
810680bcae
commit
6f7aaf10a5
@ -213,8 +213,8 @@ $ ping -c1 10.233.4.2
|
|||||||
<para>Networking is implemented using a pair of virtual Ethernet
|
<para>Networking is implemented using a pair of virtual Ethernet
|
||||||
devices. The network interface in the container is called
|
devices. The network interface in the container is called
|
||||||
<literal>eth0</literal>, while the matching interface in the host is
|
<literal>eth0</literal>, while the matching interface in the host is
|
||||||
called <literal>c-<replaceable>container-name</replaceable></literal>
|
called <literal>ve-<replaceable>container-name</replaceable></literal>
|
||||||
(e.g., <literal>c-foo</literal>). The container has its own network
|
(e.g., <literal>ve-foo</literal>). The container has its own network
|
||||||
namespace and the <literal>CAP_NET_ADMIN</literal> capability, so it
|
namespace and the <literal>CAP_NET_ADMIN</literal> capability, so it
|
||||||
can perform arbitrary network configuration such as setting up
|
can perform arbitrary network configuration such as setting up
|
||||||
firewall rules, without affecting or having access to the host’s
|
firewall rules, without affecting or having access to the host’s
|
||||||
@ -228,11 +228,11 @@ on the host:
|
|||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
networking.nat.enable = true;
|
networking.nat.enable = true;
|
||||||
networking.nat.internalInterfaces = ["c-+"];
|
networking.nat.internalInterfaces = ["ve-+"];
|
||||||
networking.nat.externalInterface = "eth0";
|
networking.nat.externalInterface = "eth0";
|
||||||
</programlisting>
|
</programlisting>
|
||||||
where <literal>eth0</literal> should be replaced with the desired
|
where <literal>eth0</literal> should be replaced with the desired
|
||||||
external interface. Note that <literal>c-+</literal> is a wildcard
|
external interface. Note that <literal>ve-+</literal> is a wildcard
|
||||||
that matches all container interfaces.</para>
|
that matches all container interfaces.</para>
|
||||||
|
|
||||||
</section>
|
</section>
|
||||||
|
@ -4,6 +4,28 @@
|
|||||||
|
|
||||||
<title>Release notes</title>
|
<title>Release notes</title>
|
||||||
|
|
||||||
|
<!--==================================================================-->
|
||||||
|
|
||||||
|
<section xml:id="sec-release-14.10">
|
||||||
|
|
||||||
|
<title>Release 14.10 (“Caterpillar”, 2014/10/??)</title>
|
||||||
|
|
||||||
|
<para>When upgrading from a previous release, please be aware of the
|
||||||
|
following incompatible changes:
|
||||||
|
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem><para>The host side of a container virtual Ethernet pair
|
||||||
|
is now called <literal>ve-<replaceable>container-name</replaceable></literal>
|
||||||
|
rather than <literal>c-<replaceable>container-name</replaceable></literal>.</para></listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
<!--==================================================================-->
|
<!--==================================================================-->
|
||||||
|
|
||||||
<section xml:id="sec-release-14.04">
|
<section xml:id="sec-release-14.04">
|
||||||
|
@ -34,9 +34,8 @@ let
|
|||||||
|
|
||||||
# Ignore peth* devices; on Xen, they're renamed physical
|
# Ignore peth* devices; on Xen, they're renamed physical
|
||||||
# Ethernet cards used for bridging. Likewise for vif* and tap*
|
# Ethernet cards used for bridging. Likewise for vif* and tap*
|
||||||
# (Xen) and virbr* and vnet* (libvirt) and c-* and ctmp-* (NixOS
|
# (Xen) and virbr* and vnet* (libvirt).
|
||||||
# containers).
|
denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet*
|
||||||
denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet* c-* ctmp-*
|
|
||||||
|
|
||||||
${config.networking.dhcpcd.extraConfig}
|
${config.networking.dhcpcd.extraConfig}
|
||||||
'';
|
'';
|
||||||
|
@ -4,16 +4,6 @@ with lib;
|
|||||||
|
|
||||||
let
|
let
|
||||||
|
|
||||||
runInNetns = pkgs.stdenv.mkDerivation {
|
|
||||||
name = "run-in-netns";
|
|
||||||
unpackPhase = "true";
|
|
||||||
buildPhase = ''
|
|
||||||
mkdir -p $out/bin
|
|
||||||
gcc ${./run-in-netns.c} -o $out/bin/run-in-netns
|
|
||||||
'';
|
|
||||||
installPhase = "true";
|
|
||||||
};
|
|
||||||
|
|
||||||
nixos-container = pkgs.substituteAll {
|
nixos-container = pkgs.substituteAll {
|
||||||
name = "nixos-container";
|
name = "nixos-container";
|
||||||
dir = "bin";
|
dir = "bin";
|
||||||
@ -23,6 +13,28 @@ let
|
|||||||
inherit (pkgs) socat;
|
inherit (pkgs) socat;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# The container's init script, a small wrapper around the regular
|
||||||
|
# NixOS stage-2 init script.
|
||||||
|
containerInit = pkgs.writeScript "container-init"
|
||||||
|
''
|
||||||
|
#! ${pkgs.stdenv.shell} -e
|
||||||
|
|
||||||
|
# Initialise the container side of the veth pair.
|
||||||
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
||||||
|
ip link set host0 name eth0
|
||||||
|
ip link set dev eth0 up
|
||||||
|
if [ -n "$HOST_ADDRESS" ]; then
|
||||||
|
ip route add $HOST_ADDRESS dev eth0
|
||||||
|
ip route add default via $HOST_ADDRESS
|
||||||
|
fi
|
||||||
|
if [ -n "$LOCAL_ADDRESS" ]; then
|
||||||
|
ip addr add $LOCAL_ADDRESS dev eth0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$1"
|
||||||
|
'';
|
||||||
|
|
||||||
system = config.nixpkgs.system;
|
system = config.nixpkgs.system;
|
||||||
|
|
||||||
in
|
in
|
||||||
@ -70,7 +82,7 @@ in
|
|||||||
Whether to give the container its own private virtual
|
Whether to give the container its own private virtual
|
||||||
Ethernet interface. The interface is called
|
Ethernet interface. The interface is called
|
||||||
<literal>eth0</literal>, and is hooked up to the interface
|
<literal>eth0</literal>, and is hooked up to the interface
|
||||||
<literal>c-<replaceable>container-name</replaceable></literal>
|
<literal>ve-<replaceable>container-name</replaceable></literal>
|
||||||
on the host. If this option is not set, then the
|
on the host. If this option is not set, then the
|
||||||
container shares the network interfaces of the host,
|
container shares the network interfaces of the host,
|
||||||
and can bind to any port on any interface.
|
and can bind to any port on any interface.
|
||||||
@ -176,39 +188,8 @@ in
|
|||||||
"/nix/var/nix/profiles/per-container/$INSTANCE" \
|
"/nix/var/nix/profiles/per-container/$INSTANCE" \
|
||||||
"/nix/var/nix/gcroots/per-container/$INSTANCE"
|
"/nix/var/nix/gcroots/per-container/$INSTANCE"
|
||||||
|
|
||||||
if [ -f "/etc/containers/$INSTANCE.conf" ]; then
|
|
||||||
. "/etc/containers/$INSTANCE.conf"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Cleanup from last time.
|
|
||||||
ifaceHost=c-$INSTANCE
|
|
||||||
ifaceCont=ctmp-$INSTANCE
|
|
||||||
ns=net-$INSTANCE
|
|
||||||
ip netns del $ns 2> /dev/null || true
|
|
||||||
ip link del $ifaceHost 2> /dev/null || true
|
|
||||||
ip link del $ifaceCont 2> /dev/null || true
|
|
||||||
|
|
||||||
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
||||||
# Create a pair of virtual ethernet devices. On the host,
|
extraFlags="--network-veth"
|
||||||
# we get ‘c-<container-name’, and on the guest, we get
|
|
||||||
# ‘eth0’.
|
|
||||||
ip link add $ifaceHost type veth peer name $ifaceCont
|
|
||||||
ip netns add $ns
|
|
||||||
ip link set $ifaceCont netns $ns
|
|
||||||
ip netns exec $ns ip link set $ifaceCont name eth0
|
|
||||||
ip netns exec $ns ip link set dev eth0 up
|
|
||||||
ip link set dev $ifaceHost up
|
|
||||||
if [ -n "$HOST_ADDRESS" ]; then
|
|
||||||
ip addr add $HOST_ADDRESS dev $ifaceHost
|
|
||||||
ip netns exec $ns ip route add $HOST_ADDRESS dev eth0
|
|
||||||
ip netns exec $ns ip route add default via $HOST_ADDRESS
|
|
||||||
fi
|
|
||||||
if [ -n "$LOCAL_ADDRESS" ]; then
|
|
||||||
ip netns exec $ns ip addr add $LOCAL_ADDRESS dev eth0
|
|
||||||
ip route add $LOCAL_ADDRESS dev $ifaceHost
|
|
||||||
fi
|
|
||||||
runInNetNs="${runInNetns}/bin/run-in-netns $ns"
|
|
||||||
extraFlags="--capability=CAP_NET_ADMIN"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# If the host is 64-bit and the container is 32-bit, add a
|
# If the host is 64-bit and the container is 32-bit, add a
|
||||||
@ -219,7 +200,7 @@ in
|
|||||||
fi
|
fi
|
||||||
''}
|
''}
|
||||||
|
|
||||||
exec $runInNetNs ${config.systemd.package}/bin/systemd-nspawn \
|
exec ${config.systemd.package}/bin/systemd-nspawn \
|
||||||
--keep-unit \
|
--keep-unit \
|
||||||
-M "$INSTANCE" -D "$root" $extraFlags \
|
-M "$INSTANCE" -D "$root" $extraFlags \
|
||||||
--bind-ro=/nix/store \
|
--bind-ro=/nix/store \
|
||||||
@ -227,7 +208,11 @@ in
|
|||||||
--bind-ro=/nix/var/nix/daemon-socket \
|
--bind-ro=/nix/var/nix/daemon-socket \
|
||||||
--bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
|
--bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
|
||||||
--bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
|
--bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
|
||||||
"''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
--setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
|
||||||
|
--setenv HOST_ADDRESS="$HOST_ADDRESS" \
|
||||||
|
--setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
|
||||||
|
--setenv PATH="$PATH" \
|
||||||
|
${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
||||||
'';
|
'';
|
||||||
|
|
||||||
postStart =
|
postStart =
|
||||||
@ -237,6 +222,17 @@ in
|
|||||||
# until the start timeout expires if systemd-nspawn exits.
|
# until the start timeout expires if systemd-nspawn exits.
|
||||||
read x < $root/var/lib/startup-done
|
read x < $root/var/lib/startup-done
|
||||||
rm -f $root/var/lib/startup-done
|
rm -f $root/var/lib/startup-done
|
||||||
|
|
||||||
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
||||||
|
ifaceHost=ve-$INSTANCE
|
||||||
|
ip link set dev $ifaceHost up
|
||||||
|
if [ -n "$HOST_ADDRESS" ]; then
|
||||||
|
ip addr add $HOST_ADDRESS dev $ifaceHost
|
||||||
|
fi
|
||||||
|
if [ -n "$LOCAL_ADDRESS" ]; then
|
||||||
|
ip route add $LOCAL_ADDRESS dev $ifaceHost
|
||||||
|
fi
|
||||||
|
fi
|
||||||
'';
|
'';
|
||||||
|
|
||||||
preStop =
|
preStop =
|
||||||
@ -251,14 +247,13 @@ in
|
|||||||
''
|
''
|
||||||
#! ${pkgs.stdenv.shell} -e
|
#! ${pkgs.stdenv.shell} -e
|
||||||
SYSTEM_PATH=/nix/var/nix/profiles/system
|
SYSTEM_PATH=/nix/var/nix/profiles/system
|
||||||
if [ -f "/etc/containers/$INSTANCE.conf" ]; then
|
|
||||||
. "/etc/containers/$INSTANCE.conf"
|
|
||||||
fi
|
|
||||||
echo $SYSTEM_PATH/bin/switch-to-configuration test | \
|
echo $SYSTEM_PATH/bin/switch-to-configuration test | \
|
||||||
${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket -
|
${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket -
|
||||||
'';
|
'';
|
||||||
|
|
||||||
serviceConfig.SyslogIdentifier = "container %i";
|
serviceConfig.SyslogIdentifier = "container %i";
|
||||||
|
|
||||||
|
serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf";
|
||||||
};
|
};
|
||||||
|
|
||||||
# Generate a configuration file in /etc/containers for each
|
# Generate a configuration file in /etc/containers for each
|
||||||
@ -288,6 +283,8 @@ in
|
|||||||
${cfg.localAddress} ${name}.containers
|
${cfg.localAddress} ${name}.containers
|
||||||
'') config.containers);
|
'') config.containers);
|
||||||
|
|
||||||
|
networking.dhcpcd.denyInterfaces = [ "ve-*" ];
|
||||||
|
|
||||||
environment.systemPackages = [ nixos-container ];
|
environment.systemPackages = [ nixos-container ];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -1,50 +0,0 @@
|
|||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <errno.h>
|
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <sched.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/mount.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <linux/limits.h>
|
|
||||||
|
|
||||||
int main(int argc, char * * argv)
|
|
||||||
{
|
|
||||||
if (argc < 3) {
|
|
||||||
fprintf(stderr, "%s: missing arguments\n", argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
char nsPath[PATH_MAX];
|
|
||||||
|
|
||||||
sprintf(nsPath, "/run/netns/%s", argv[1]);
|
|
||||||
|
|
||||||
int fd = open(nsPath, O_RDONLY);
|
|
||||||
if (fd == -1) {
|
|
||||||
fprintf(stderr, "%s: opening network namespace: %s\n", argv[0], strerror(errno));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (setns(fd, CLONE_NEWNET) == -1) {
|
|
||||||
fprintf(stderr, "%s: setting network namespace: %s\n", argv[0], strerror(errno));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
umount2(nsPath, MNT_DETACH);
|
|
||||||
if (unlink(nsPath) == -1) {
|
|
||||||
fprintf(stderr, "%s: unlinking network namespace: %s\n", argv[0], strerror(errno));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* FIXME: Remount /sys so that /sys/class/net reflects the
|
|
||||||
interfaces visible in the network namespace. This requires
|
|
||||||
bind-mounting /sys/fs/cgroups etc. */
|
|
||||||
|
|
||||||
execv(argv[2], argv + 2);
|
|
||||||
fprintf(stderr, "%s: running command: %s\n", argv[0], strerror(errno));
|
|
||||||
return 1;
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user