Skip to content

Instantly share code, notes, and snippets.

@chussenot
Forked from sczizzo/rocker.rb
Created June 13, 2018 21:25
Show Gist options
  • Save chussenot/467d62760cd7092598fe15f6d24db081 to your computer and use it in GitHub Desktop.
Save chussenot/467d62760cd7092598fe15f6d24db081 to your computer and use it in GitHub Desktop.
Toy container runtime in Ruby
#!/usr/bin/env ruby
# Dependencies:
# - capsh
# - cgroup-utils
# - systemd
# - ruby
#
require 'fileutils'
require 'logger'
require 'optparse'
require 'ostruct'
require 'tmpdir'
require 'securerandom'
require 'shellwords'
Thread.abort_on_exception = true
class Options
class Invalid < StandardError; end
def self.parse(args, logger)
options = OpenStruct.new
options.cap_set = []
options.cap_drop = []
options.cgroups = [
'blkio',
'cpu',
'cpuset',
'devices',
'freezer',
'hugetlb',
'memory',
'net_prio',
'perf_event',
'pids'
]
options.command = ['/sbin/init']
options.container = nil
options.log_level = Logger::INFO
options.environment = { 'PATH' => '/usr/sbin:/usr/bin:/sbin:/bin' }
options.hostname = nil
options.image = nil
options.namespaces = ['ipc', 'mount', 'pid', 'uts'] # net, user
options.resources = []
options.user = nil
options.volumes = []
args = parse_command! options, args
parser(options).parse! args
parse_positional! options, args
logger.level = options.log_level
options.id = options.container + '-' + SecureRandom.hex
options.hostname ||= options.container
options
end
def self.parse_command!(options, args)
if dash_idx = args.index('--')
options.command = args.drop(dash_idx + 1)
args = args.take(dash_idx)
end
if options.command.empty?
raise OptionParser::MissingArgument, 'command'
end
args
end
def self.parse_positional!(options, args)
case args.size
when 0
raise OptionParser::MissingArgument, 'image'
when 1
raise OptionParser::MissingArgument, 'container'
when 2
options.image, options.container = args
else
raise OptionParser::InvalidArgument
end
end
def self.parser(options)
overwrite_cgroups = nil
overwrite_environment = nil
overwrite_namespaces = nil
parser = OptionParser.new do |opts|
opts.banner = "Usage: #{__FILE__} [OPTIONS] IMAGE CONTAINER [-- COMMAND]"
opts.separator ''
opts.separator 'Options:'
opts.on '-c', '--cgroup CGROUP', 'CGroups (array)' do |cgroup|
overwrite_cgroups ||= []
overwrite_cgroups << cgroup
options[:cgroups] = overwrite_cgroups.uniq
end
opts.on '-d', '--drop-cap CAPABILITY', 'Drop capabilities (array)' do |cap|
options[:cap_drop] << cap
options[:cap_drop].uniq!
end
opts.on '-e', '--environment ENV_VAR', 'Environment (array)' do |env_var|
key, value = env_var.split '=', 2
overwrite_environment ||= {}
overwrite_environment[key] = value
options[:environment] = overwrite_environment
end
opts.on '-h', '--hostname NAME', 'Hostname' do |name|
options[:hostname] = name
end
opts.on '-l', '--level LEVEL', 'Log level' do |level|
options[:log_level] = Logger.const_get level.upcase
end
opts.on '-n', '--namespace NAMESPACE', 'Namespaces (array)' do |namespace|
overwrite_namespaces ||= []
overwrite_namespaces << namespace
options[:namespaces] = overwrite_namespaces.uniq
end
opts.on '-r', '--resource RESOURCE', 'Set resource (array)' do |resource|
options[:resources] << resource
options[:resources].uniq!
end
opts.on '-s', '--set-cap CAPABILITY', 'Set capabilities (array)' do |cap|
options[:cap_set] << cap
options[:cap_set].uniq!
end
opts.on '-u', '--user USER', 'Set user' do |user|
options[:user] = user
end
opts.on '-v', '--volume SRC[:DEST[:OPTS]]', 'Volumes (array)' do |volume|
src, dest, opts = volume.split ':', 3
dest ||= src
opts ||= ''
opts = opts.split(',').map do |opt|
opt = opt.split('=', 2)
opt = [opt.first, nil] if opt.size == 1
opt
end
options[:volumes] << {
device: src,
dir: dest,
opts: Hash[opts]
}
end
end
parser
end
end
class Mount
def self.format_opts(opts={})
opts.map do |k, v|
[k, v].compact.map { |i| Shellwords.escape(i) }.join('=')
end.join(',')
end
def self.format_command(device:, dir:, type:nil, opts:nil, args:[])
command = ['mount']
command += args
command += ['-t', type] if type
command += ['-o', format_opts(opts)] if opts.any?
command += [device, dir]
command
end
def self.unmount(dir:)
system Shellwords.join(['umount', '-l', dir])
end
def self.mount(device:, dir:, type:nil, opts:nil, args:[])
mount_command = format_command({
device: device, dir: dir, type: type, opts: opts
})
system Shellwords.join(mount_command)
begin
yield dir
ensure
unmount dir: dir
end if block_given?
dir
end
def self.overlay(root, &block)
Dir.mktmpdir do |tmp|
dirs = {
lowerdir: root,
upperdir: File.join(tmp, 'upper'),
workdir: File.join(tmp, 'work'),
overlay: File.join(tmp, 'overlay')
}
FileUtils.mkdir_p dirs.values
overlay_dir = dirs.delete :overlay
mount device: root,
dir: overlay_dir,
type: :overlay,
opts: dirs \
do
block.call overlay_dir
end
end
end
end
class Utils
def self.child_pids(pid=Process.pid)
pids = []
children = `pgrep -P #{pid}`.lines.map(&:strip).map(&:to_i)
children.delete(pid)
pids += children
pids += children.map { |child| child_pids(child) }
pids.flatten
end
def self.copy_host_resolv_conf(overlay)
FileUtils.cp '/etc/resolv.conf', \
File.join(overlay, '/etc/resolv.conf')
end
def self.mount_host_volumes(overlay, volumes)
volumes.map do |vol|
overlay_vol_dir = File.join(overlay, vol[:dir])
vol[:dir] = overlay_vol_dir
vol[:args] ||= []
vol[:args] |= ['--rbind']
[
['mkdir', '-p', overlay_vol_dir],
Mount.format_command(vol)
].each do |command|
system Shellwords.join(command)
end
trap 'EXIT' do
`#{Shellwords.join(['umount', '-f', overlay_vol_dir])} >/dev/null 2>&1`
end
overlay_vol_dir
end
end
def self.umount_host_volumes(mounts)
mounts.each do |mount|
`#{Shellwords.join(['umount', mount])} >/dev/null 2>&1`
end
end
end
class CGroup
def self.group_opt(controllers:, path:)
group = controllers.join(',') + ':' + path
['-g', group]
end
def self.create_group(controllers:, path:)
system Shellwords.join([
'cgcreate', *group_opt({
controllers: controllers,
path: path
})
])
end
def self.delete_group(controllers:, path:)
system Shellwords.join([
'cgdelete', *group_opt({
controllers: controllers,
path: path
})
])
end
def self.resources(cgroup:, resources:)
resource_opts = resources.map { |r| ['-r', r] }.flatten
return if resource_opts.empty?
system Shellwords.join([
'cgset', *resource_opts, cgroup[:path]
])
end
def self.exec(cgroup:, command:)
system Shellwords.join([
'cgexec', '--sticky', *group_opt(cgroup), *command
])
end
def self.with_group(controllers:, path:, &block)
cgroup = { controllers: controllers, path: path }
create_group cgroup
begin
block.call cgroup
ensure
delete_group cgroup
end
end
end
class Slice
def self.slice_path(*path)
File.join '/sys/fs/cgroup/systemd', *path
end
def self.create(slice)
path = slice_path slice
FileUtils.mkdir_p path
path
end
def self.delete(slice_path)
FileUtils.rm_rf slice_path
end
def self.watch(slice_path)
Thread.new do
loop do
Utils.child_pids.each do |pid|
begin
File.open(File.join(slice_path, 'tasks'), 'a') do |f|
f.puts pid.to_s
end
rescue
end
end
sleep 0.25
end
end
end
end
unless Process.uid.zero?
class YouAintRoot < StandardError; end
raise YouAintRoot, 'Must be run with root privileges'
end
logger = Logger.new $stderr
options = Options.parse ARGV, logger
unless File.exist? options.image
class ImageDontExist < StandardError; end
raise ImageDontExist, 'Could not find specified image'
end
Mount.overlay options.image do |overlay|
environment = options.environment.map do |k, v|
"#{Shellwords.escape k}=#{Shellwords.escape v}"
end.join(' ')
start_container = <<-END
mount -t proc proc /proc
mount -t tmpfs -o nosuid,strictatime,mode=755,size=1G tmpfs /tmp
mount -t tmpfs -o nosuid,strictatime,mode=755,size=1G tmpfs /dev
mount -t sysfs -o nosuid,noexec,nodev,ro sys /sys
hostname #{Shellwords.escape options.hostname}
exec env -i #{environment} \
#{Shellwords.join(options.command)}
END
cap_chroot = "--chroot=#{overlay}"
cap_sets = options.cap_set.map { |cap| Shellwords.escape "cap_#{cap}" }
cap_set = cap_sets.any? ? '--cap=' + cap_sets.join(',') : nil
cap_drops = options.cap_drop.map { |cap| Shellwords.escape "cap_#{cap}" }
cap_drop = cap_drops.any? ? '--drop=' + cap_drops.join(',') : nil
cap_user = options.user ? "--user=#{options.user}" : nil
capsh_opts = [cap_chroot, cap_set, cap_drop, cap_user].compact
capsh_command = ['capsh', *capsh_opts, '--', '-c', start_container]
unshare_opts = options.namespaces.map { |ns| "--#{ns}" }
unshare_command = ['unshare', *unshare_opts, '--fork', *capsh_command]
logger.debug options: options, command: unshare_command
CGroup.with_group controllers: options.cgroups,
path: "/rocker/#{options.id}" \
do |cgroup|
Utils.copy_host_resolv_conf overlay
mounts = Utils.mount_host_volumes overlay, options.volumes
rocker_slice = Slice.create '/rocker'
container_slice = Slice.create "/rocker/#{options.id}"
begin
Slice.watch container_slice
CGroup.resources cgroup: cgroup,
resources: options.resources
CGroup.exec cgroup: cgroup,
command: unshare_command
ensure
Slice.delete container_slice
Utils.umount_host_volumes mounts
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment