Created
August 14, 2018 02:24
-
-
Save AkihiroSuda/889b29c56a3c9b546a843fb03f11d60a to your computer and use it in GitHub Desktop.
[PATCH] rootless: make /sys/fs/cgroup/* read-only
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 74deace73955864d9bb2a25e0c5b44612f23e627 Mon Sep 17 00:00:00 2001 | |
From: Akihiro Suda <suda.akihiro@lab.ntt.co.jp> | |
Date: Tue, 14 Aug 2018 10:45:20 +0900 | |
Subject: [PATCH] rootless: make /sys/fs/cgroup/* read-only | |
The default rootless spec bind-mounts /sys with "rbind,ro" (because mounting sysfs | |
requires netns to be unshared), however, it does not make subfilesystems mounted | |
under /sys read-only. | |
So, files under /sys/fs/cgroup were unexpectedly writable when they are chmod/chowned | |
via privileged helpers such as pam_cgfs. | |
This patch fixes the issue by mounting /sys/fs/cgroup/* as read-only explicitly. | |
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp> | |
--- | |
libcontainer/rootfs_linux.go | 12 ++++++ | |
libcontainer/specconv/example.go | 71 ++++++++++++++++++++++++-------- | |
tests/integration/mounts.bats | 9 ++++ | |
3 files changed, 75 insertions(+), 17 deletions(-) | |
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go | |
index 7f852efc..c16cbd73 100644 | |
--- a/libcontainer/rootfs_linux.go | |
+++ b/libcontainer/rootfs_linux.go | |
@@ -298,6 +298,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { | |
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil { | |
return err | |
} | |
+ // Add a dummy directory for cgroup2. | |
+ // Without this hack, `df` fails with `df: /sys/fs/cgroup/unified: No such file or directory` | |
+ // when /sys is bind-mounted from the host to the container (typically when netns is not unshared), | |
+ if err := os.MkdirAll(filepath.Join(rootfs, m.Destination, "unified"), 0755); err != nil { | |
+ return err | |
+ } | |
for _, b := range binds { | |
if err := mountToRootfs(b, rootfs, mountLabel); err != nil { | |
return err | |
@@ -361,6 +367,12 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { | |
var binds []*configs.Mount | |
for _, mm := range mounts { | |
+ // when /sys is bind-mounted from the host to the container (typically when netns is not unshared), | |
+ // mm.Mountpoint can be like "/containers/foo/rootfs/sys/fs/cgroup/systemd/user.slice/user-1001.slice/session-1.scope/foo". | |
+ // | |
+ if !strings.HasPrefix(mm.Mountpoint, "/sys/fs/cgroup") { | |
+ continue | |
+ } | |
dir, err := mm.GetOwnCgroup(cgroupPaths) | |
if err != nil { | |
return nil, err | |
diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go | |
index c113b337..4c809a1e 100644 | |
--- a/libcontainer/specconv/example.go | |
+++ b/libcontainer/specconv/example.go | |
@@ -162,6 +162,10 @@ func ToRootless(spec *specs.Spec) { | |
var namespaces []specs.LinuxNamespace | |
// Remove networkns from the spec. | |
+ // | |
+ // TODO: removal of networkns should be optional, | |
+ // because networkns can be made useful with usermode network stack | |
+ // such as slirp4netns or VPNKit. | |
for _, ns := range spec.Linux.Namespaces { | |
switch ns.Type { | |
case specs.NetworkNamespace, specs.UserNamespace: | |
@@ -189,13 +193,9 @@ func ToRootless(spec *specs.Spec) { | |
}} | |
// Fix up mounts. | |
- var mounts []specs.Mount | |
- for _, mount := range spec.Mounts { | |
- // Ignore all mounts that are under /sys. | |
- if strings.HasPrefix(mount.Destination, "/sys") { | |
- continue | |
- } | |
- | |
+ fixupRootlessBindSys(spec) | |
+ for i := range spec.Mounts { | |
+ mount := &spec.Mounts[i] | |
// Remove all gid= and uid= mappings. | |
var options []string | |
for _, option := range mount.Options { | |
@@ -203,19 +203,56 @@ func ToRootless(spec *specs.Spec) { | |
options = append(options, option) | |
} | |
} | |
- | |
mount.Options = options | |
- mounts = append(mounts, mount) | |
} | |
- // Add the sysfs mount as an rbind. | |
- mounts = append(mounts, specs.Mount{ | |
- Source: "/sys", | |
- Destination: "/sys", | |
- Type: "none", | |
- Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, | |
- }) | |
- spec.Mounts = mounts | |
// Remove cgroup settings. | |
spec.Linux.Resources = nil | |
} | |
+ | |
+// fixupRootlessBindSys bind-mounts /sys. | |
+// This is required when netns is not unshared. | |
+func fixupRootlessBindSys(spec *specs.Spec) { | |
+ // Fix up mounts. | |
+ var mounts []specs.Mount | |
+ for _, mount := range spec.Mounts { | |
+ // Ignore all mounts that are under /sys. | |
+ if strings.HasPrefix(mount.Destination, "/sys") { | |
+ continue | |
+ } | |
+ mounts = append(mounts, mount) | |
+ } | |
+ mounts = append(mounts, []specs.Mount{ | |
+ // Add the sysfs mount as an rbind. | |
+ // Note: | |
+ // * "ro" does not make submounts read-only recursively. | |
+ // * rbind work for sysfs but bind does not. | |
+ { | |
+ Source: "/sys", | |
+ Destination: "/sys", | |
+ Type: "none", | |
+ Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, | |
+ }, | |
+ // Add cgroup mount so as to make them read-only | |
+ { | |
+ Destination: "/sys/fs/cgroup", | |
+ Type: "cgroup", | |
+ Source: "cgroup", | |
+ Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, | |
+ }, | |
+ }...) | |
+ spec.Mounts = mounts | |
+ var maskedPaths []string | |
+ for _, masked := range spec.Linux.MaskedPaths { | |
+ // Ignore masked paths that are under /sys. (should be safe for rootless mode) | |
+ // | |
+ // Without this hack, `df` fails with "df: /sys/firmware/efi/efivars" | |
+ // because the efivars entry exists in mtab (because /sys is rbind-mounted) | |
+ // but /sys/firmware is masked. | |
+ if strings.HasPrefix(masked, "/sys") { | |
+ continue | |
+ } | |
+ maskedPaths = append(maskedPaths, masked) | |
+ } | |
+ spec.Linux.MaskedPaths = maskedPaths | |
+} | |
diff --git a/tests/integration/mounts.bats b/tests/integration/mounts.bats | |
index c35b3c5f..1e07ba2e 100755 | |
--- a/tests/integration/mounts.bats | |
+++ b/tests/integration/mounts.bats | |
@@ -19,3 +19,12 @@ function teardown() { | |
[ "$status" -eq 0 ] | |
[[ "${lines[0]}" =~ '/tmp/bind/config.json' ]] | |
} | |
+ | |
+# make sure there is no "df: /foo/bar: No such file or directory" issue in the default configuration | |
+@test "runc run [df]" { | |
+ CONFIG=$(jq '.process.args = ["df"]' config.json) | |
+ echo "${CONFIG}" >config.json | |
+ | |
+ runc run test_df | |
+ [ "$status" -eq 0 ] | |
+} | |
-- | |
2.17.1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment