Skip to content

Instantly share code, notes, and snippets.

@AkihiroSuda
Created August 14, 2018 02:24
Show Gist options
  • Save AkihiroSuda/889b29c56a3c9b546a843fb03f11d60a to your computer and use it in GitHub Desktop.
Save AkihiroSuda/889b29c56a3c9b546a843fb03f11d60a to your computer and use it in GitHub Desktop.
[PATCH] rootless: make /sys/fs/cgroup/* read-only
From 74deace73955864d9bb2a25e0c5b44612f23e627 Mon Sep 17 00:00:00 2001
From: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
Date: Tue, 14 Aug 2018 10:45:20 +0900
Subject: [PATCH] rootless: make /sys/fs/cgroup/* read-only
The default rootless spec bind-mounts /sys with "rbind,ro" (because mounting sysfs
requires netns to be unshared), however, it does not make subfilesystems mounted
under /sys read-only.
So, files under /sys/fs/cgroup were unexpectedly writable when they are chmod/chowned
via privileged helpers such as pam_cgfs.
This patch fixes the issue by mounting /sys/fs/cgroup/* as read-only explicitly.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
---
libcontainer/rootfs_linux.go | 12 ++++++
libcontainer/specconv/example.go | 71 ++++++++++++++++++++++++--------
tests/integration/mounts.bats | 9 ++++
3 files changed, 75 insertions(+), 17 deletions(-)
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 7f852efc..c16cbd73 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -298,6 +298,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
return err
}
+ // Add a dummy directory for cgroup2.
+ // Without this hack, `df` fails with `df: /sys/fs/cgroup/unified: No such file or directory`
+ // when /sys is bind-mounted from the host to the container (typically when netns is not unshared),
+ if err := os.MkdirAll(filepath.Join(rootfs, m.Destination, "unified"), 0755); err != nil {
+ return err
+ }
for _, b := range binds {
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
return err
@@ -361,6 +367,12 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
var binds []*configs.Mount
for _, mm := range mounts {
+ // when /sys is bind-mounted from the host to the container (typically when netns is not unshared),
+ // mm.Mountpoint can be like "/containers/foo/rootfs/sys/fs/cgroup/systemd/user.slice/user-1001.slice/session-1.scope/foo".
+ //
+ if !strings.HasPrefix(mm.Mountpoint, "/sys/fs/cgroup") {
+ continue
+ }
dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil {
return nil, err
diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go
index c113b337..4c809a1e 100644
--- a/libcontainer/specconv/example.go
+++ b/libcontainer/specconv/example.go
@@ -162,6 +162,10 @@ func ToRootless(spec *specs.Spec) {
var namespaces []specs.LinuxNamespace
// Remove networkns from the spec.
+ //
+ // TODO: removal of networkns should be optional,
+ // because networkns can be made useful with usermode network stack
+ // such as slirp4netns or VPNKit.
for _, ns := range spec.Linux.Namespaces {
switch ns.Type {
case specs.NetworkNamespace, specs.UserNamespace:
@@ -189,13 +193,9 @@ func ToRootless(spec *specs.Spec) {
}}
// Fix up mounts.
- var mounts []specs.Mount
- for _, mount := range spec.Mounts {
- // Ignore all mounts that are under /sys.
- if strings.HasPrefix(mount.Destination, "/sys") {
- continue
- }
-
+ fixupRootlessBindSys(spec)
+ for i := range spec.Mounts {
+ mount := &spec.Mounts[i]
// Remove all gid= and uid= mappings.
var options []string
for _, option := range mount.Options {
@@ -203,19 +203,56 @@ func ToRootless(spec *specs.Spec) {
options = append(options, option)
}
}
-
mount.Options = options
- mounts = append(mounts, mount)
}
- // Add the sysfs mount as an rbind.
- mounts = append(mounts, specs.Mount{
- Source: "/sys",
- Destination: "/sys",
- Type: "none",
- Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
- })
- spec.Mounts = mounts
// Remove cgroup settings.
spec.Linux.Resources = nil
}
+
+// fixupRootlessBindSys bind-mounts /sys.
+// This is required when netns is not unshared.
+func fixupRootlessBindSys(spec *specs.Spec) {
+ // Fix up mounts.
+ var mounts []specs.Mount
+ for _, mount := range spec.Mounts {
+ // Ignore all mounts that are under /sys.
+ if strings.HasPrefix(mount.Destination, "/sys") {
+ continue
+ }
+ mounts = append(mounts, mount)
+ }
+ mounts = append(mounts, []specs.Mount{
+ // Add the sysfs mount as an rbind.
+ // Note:
+ // * "ro" does not make submounts read-only recursively.
+ // * rbind work for sysfs but bind does not.
+ {
+ Source: "/sys",
+ Destination: "/sys",
+ Type: "none",
+ Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
+ },
+ // Add cgroup mount so as to make them read-only
+ {
+ Destination: "/sys/fs/cgroup",
+ Type: "cgroup",
+ Source: "cgroup",
+ Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
+ },
+ }...)
+ spec.Mounts = mounts
+ var maskedPaths []string
+ for _, masked := range spec.Linux.MaskedPaths {
+ // Ignore masked paths that are under /sys. (should be safe for rootless mode)
+ //
+ // Without this hack, `df` fails with "df: /sys/firmware/efi/efivars"
+ // because the efivars entry exists in mtab (because /sys is rbind-mounted)
+ // but /sys/firmware is masked.
+ if strings.HasPrefix(masked, "/sys") {
+ continue
+ }
+ maskedPaths = append(maskedPaths, masked)
+ }
+ spec.Linux.MaskedPaths = maskedPaths
+}
diff --git a/tests/integration/mounts.bats b/tests/integration/mounts.bats
index c35b3c5f..1e07ba2e 100755
--- a/tests/integration/mounts.bats
+++ b/tests/integration/mounts.bats
@@ -19,3 +19,12 @@ function teardown() {
[ "$status" -eq 0 ]
[[ "${lines[0]}" =~ '/tmp/bind/config.json' ]]
}
+
+# make sure there is no "df: /foo/bar: No such file or directory" issue in the default configuration
+@test "runc run [df]" {
+ CONFIG=$(jq '.process.args = ["df"]' config.json)
+ echo "${CONFIG}" >config.json
+
+ runc run test_df
+ [ "$status" -eq 0 ]
+}
--
2.17.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment