Skip to content

Instantly share code, notes, and snippets.

@chaserhkj
Created November 13, 2021 23:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chaserhkj/7124bfc60822358ace2d46135ca2464c to your computer and use it in GitHub Desktop.
Save chaserhkj/7124bfc60822358ace2d46135ca2464c to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import sh
import os, sys
import time
shx = sh(_out=sys.stdout, _err=sys.stderr)
eprint = lambda p: print(p, file=sys.stderr)
# Masks are for configuring kernel writeback worker mask
# Full core usages
TOTAL_CORES_MASK="ffff"
# Only use Host cores when VM is active
# HOST_CORES_MASK="0f0f" # Normal Config: 0-3,8-11
HOST_CORES_MASK="0707" # Isolated IO Config: 0-2,8-10
TOTAL_CORES='0-15'
# HOST_CORES="0-3,8-11" # Normal Config
HOST_CORES="0-2,8-10" # Isolated IO Config
# VIRT_CORES="4-7,12-15" # Normal Config
VIRT_CORES="3-7,11-15" # Isolated IO Config
# Cores for running IO related tasks
IO_CORES="3,11"
def write_sys(content, path):
with open(path, "w") as f:
f.write(content)
def read_sys(path):
with open(path) as f:
return f.read().strip()
def ensure_sys(content, path, choice=False):
retry = 0
while retry < 5:
write_sys(content, path)
if choice:
current_config = read_sys(path)
if "[" + content + "]" in current_config:
return True
else:
if read_sys(path) == content:
return True
retry += 1
time.sleep(1)
return False
def get_csets(recursive=False):
if recursive:
csets = sh.cset("-m", "set", "-l", "-r").strip().split("\n")
else:
csets = sh.cset("-m", "set", "-l").strip().split("\n")
csets = [line.split(";") for line in csets[1:-1]]
csets = [(line[0], line[-1]) for line in csets]
return csets
def get_cset_names(recursive=False):
return [set_info[0] for set_info in get_csets(recursive)]
def shield_vm():
shx.cset("set", "-c", TOTAL_CORES, "-s", "machine.slice")
shx.cset("set", "-c", IO_CORES, "-s", "kvm.win10.io")
shx.cset("set", "-c", HOST_CORES, "-r", "-s", "docker")
for set_name, set_path in get_csets():
if set_name.startswith("lxc.monitor.default"):
shx.cset("set", "-c", HOST_CORES, "-r", "-s", set_path)
if set_name.startswith("lxc.payload.default"):
shx.cset("set", "-c", HOST_CORES, "-r", "-s", set_path)
if set_name.startswith("lxc.pivot"):
shx.cset("set", "-c", HOST_CORES, "-r", "-s", set_path)
# Make sure kvm.win10.shield is freshly created
if "kvm.win10.shield" in get_cset_names():
try:
shx.cset("set", "-d", "-s", "kvm.win10.shield")
except sh.ErrorReturnCode:
shx.cset("set", "-d", "-r", "--force", "-s", "kvm.win10.shield")
# anbox-container-manager could intervene, so try to resolve that
if "kvm.win10.shield" in get_cset_names():
shx.systemctl("stop", "anbox-container-manager.service")
shx.cset("set", "-d", "-r", "--force", "-s", "kvm.win10.shield")
shx.systemctl("start", "anbox-container-manager.service")
if "kvm.win10.shield" in get_cset_names():
eprint("WARN: Unable to cleanup shield cpuset, maybe due to a nested cgroup created by another service")
shx.cset("set", "-c", HOST_CORES, "-s", "kvm.win10.shield")
shx.cset("proc", "-m", "-k", "-f", "root", "-t", "kvm.win10.shield")
def unshield_vm():
shx.cset("set", "-d", "-s", "machine.slice")
shx.cset("set", "-d", "-s", "kvm.win10.io")
shx.cset("set", "-c", TOTAL_CORES, "-r", "-p", "-s", "docker")
for set_name, set_path in get_csets():
if set_name.startswith("lxc.monitor.default"):
shx.cset("set", "-c", TOTAL_CORES, "-r", "-p", "-s", set_path)
if set_name.startswith("lxc.payload.default"):
shx.cset("set", "-c", TOTAL_CORES, "-r", "-p", "-s", set_path)
if set_name.startswith("lxc.pivot"):
shx.cset("set", "-c", HOST_CORES, "-r", "-s", set_path)
shx.cset("proc", "-m", "-k", "-f", "kvm.win10.shield", "-t", "root")
# Make sure kvm.win10.shield is deleted
try:
shx.cset("set", "-d", "-s", "kvm.win10.shield")
except sh.ErrorReturnCode:
shx.cset("set", "-d", "-r", "--force", "-s", "kvm.win10.shield")
# anbox-container-manager could intervene, so try to resolve that
if "kvm.win10.shield" in get_cset_names():
shx.systemctl("stop", "anbox-container-manager.service")
shx.cset("set", "-d", "-r", "--force", "-s", "kvm.win10.shield")
shx.systemctl("start", "anbox-container-manager.service")
if "kvm.win10.shield" in get_cset_names():
eprint("WARN: Unable to cleanup shield cpuset, maybe due to a nested cgroup created by another service")
def hp_allocated():
return read_sys("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages") == "8192" \
or read_sys("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages") == "16"
def allocate_hp(use_2MB=False):
if use_2MB:
page_count = "8192"
config_path = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
else:
page_count = "16"
config_path = "/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages"
if not ensure_sys(page_count, config_path):
if not deallocate_hp():
raise Exception("Recovery from failed hugepage allocation panicked")
return False
return True
def deallocate_hp():
return ensure_sys("0", "/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages") \
and ensure_sys("0", "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages")
def allocate_best_hp():
if hp_allocated():
eprint("Huge pages already allocated, skipping")
return True
eprint("Flushing caches")
shx.flushcache()
eprint("Short sleep after flush to wait everything out")
time.sleep(3)
if not deallocate_hp():
return False
if not allocate_hp(False):
eprint("1GB huge page allocation failed, trying 2MB")
if not allocate_hp(True):
return False
return True
def mount_hugetlbfs():
if read_sys("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages") == "8192":
if "hugetlbfs on /dev/hugepages type hugetlbfs (rw,relatime,pagesize=2M)" in sh.mount():
return
if "hugetlbfs on /dev/hugepages type hugetlbfs" in sh.mount():
shx.umount("/dev/hugepages")
shx.mount("-t", "hugetlbfs", "-o", "pagesize=2M", "hugetlbfs", "/dev/hugepages")
elif read_sys("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages") == "16":
if "hugetlbfs on /dev/hugepages type hugetlbfs (rw,relatime,pagesize=1024M)" in sh.mount():
return
if "hugetlbfs on /dev/hugepages type hugetlbfs" in sh.mount():
shx.umount("/dev/hugepages")
shx.mount("-t", "hugetlbfs", "-o", "pagesize=1024M", "hugetlbfs", "/dev/hugepages")
else:
raise Exception("No huge pages allocated")
shx.mkdir("-p", "/dev/hugepages/libvirt/qemu")
def prepare():
eprint("Stoping syncthing")
shx.sudo("-u", "hkj", "-E", "DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus", "systemctl", "--user", "stop", "syncthing")
eprint("Removing vbox modules")
shx.rmmod("vboxnetflt", "vboxpci", "vboxnetadp", "vboxdrv", _ok_code=[0, 1])
if "/dev/sdd1 on /data" in sh.mount():
eprint("Umounting /data")
shx.umount("/data")
eprint("Changing SMB Service")
shx.systemctl("stop", "smb.service")
shx.systemctl("start", "smb-vm.service")
eprint("Stoping smartd")
shx.systemctl("stop", "smartd")
eprint("Allocating huge pages")
if not allocate_best_hp():
return 1
eprint("Ensuring hugetlbfs is mounted correctly")
mount_hugetlbfs()
eprint("Shielding VM")
shield_vm()
eprint("Starting scream")
shx.cset("proc", "-e", "-s", "kvm.win10.io", "-u", "hkj", "--", "daemonize",
"-E", "PULSE_SERVER=/run/user/1000/pulse/native", "-E", "PULSE_COOKIE=/home/hkj/.config/pulse/cookie",
"-e", "/tmp/scream.err", "-o", "/tmp/scream.out",
"/usr/bin/bash", "-c",
"while ! ip addr show dev kvmbr0 | grep 192.168.111.1 >/dev/null; do sleep 1; done; exec /usr/bin/scream -u -o pulse -i 192.168.111.1")
# eprint("Starting synergys")
# shx.cset("proc", "-e", "-s", "kvm.win10.io", "-u", "hkj", "--",
# "synergys", "-c", "/home/hkj/.config/Synergy/Synergys.conf")
eprint("Starting synergyc")
shx.cset("proc", "-e", "-s", "kvm.win10.io", "-u", "hkj", "--",
"synergyc", "-n", "hkj-desktop", "192.168.111.2")
eprint("Misc kernel setup")
shx.sysctl("-w", "vm.stat_interval=120")
shx.sysctl("-w", "kernel.watchdog=0")
eprint("Setting writeback cpumask")
if not ensure_sys(HOST_CORES_MASK, "/sys/bus/workqueue/devices/writeback/cpumask"):
return 1
eprint("Disable THP")
if not ensure_sys("never", "/sys/kernel/mm/transparent_hugepage/enabled", True):
return 1
eprint("Force P-States to performance")
shx.cpupower("frequency-set", "-g", "performance")
eprint("Disable NUMA writeback")
if not ensure_sys("0", "/sys/bus/workqueue/devices/writeback/numa"):
return 1
return 0
def release():
retry = 0
while True:
try:
if not "/dev/sdd1 on /data" in sh.mount():
eprint("Mounting /data")
shx.mount("/data")
break
except sh.ErrorReturnCode:
if retry >= 5:
return 1
time.sleep(1)
retry += 1
eprint("Reverse kernel setup")
shx.sysctl("-w", "vm.stat_interval=1")
shx.sysctl("-w", "kernel.watchdog=1")
# eprint("Stopping synergys")
# shx.sudo("-u", "hkj", "killall", "synergys", _ok_code=[0, 1])
eprint("Stopping synergyc")
shx.sudo("-u", "hkj", "killall", "synergyc", _ok_code=[0, 1])
eprint("Stopping scream")
shx.sudo("-u", "hkj", "killall", "scream", _ok_code=[0, 1])
eprint("Un-Shielding VM")
unshield_vm()
eprint("Resetting writeback cpumask")
ensure_sys(TOTAL_CORES_MASK, "/sys/bus/workqueue/devices/writeback/cpumask")
eprint("Enable THP")
ensure_sys("always", "/sys/kernel/mm/transparent_hugepage/enabled", True)
eprint("Force P-States to powersave")
shx.cpupower("frequency-set", "-g", "powersave")
eprint("Enable NUMA writeback")
ensure_sys("1", "/sys/bus/workqueue/devices/writeback/numa")
eprint("Changing SMB Service")
shx.systemctl("stop", "smb-vm.service")
shx.systemctl("start", "smb.service")
eprint("Starting smartd")
shx.systemctl("start", "smartd")
eprint("Starting syncthing")
shx.sudo("-u", "hkj", "-E", "DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus", "systemctl", "--user", "start", "syncthing")
return 0
def mstate():
hp_2mb = int(read_sys("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"))
hp_2mb_free = int(read_sys("/sys/kernel/mm/hugepages/hugepages-2048kB/free_hugepages"))
hp_2mb_used = hp_2mb - hp_2mb_free
hp_1gb = int(read_sys("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages"))
hp_1gb_free = int(read_sys("/sys/kernel/mm/hugepages/hugepages-1048576kB/free_hugepages"))
hp_1gb_used = hp_1gb - hp_1gb_free
print("2MB Huge Pages: {} pages, {} MB".format(hp_2mb, hp_2mb * 2))
print(" Free : {} pages, {} MB".format(hp_2mb_free, hp_2mb_free * 2))
print(" Used : {} pages, {} MB".format(hp_2mb_used, hp_2mb_used * 2))
print("1GB Huge Pages: {} pages, {} MB".format(hp_1gb, hp_1gb * 1024))
print(" Free : {} pages, {} MB".format(hp_1gb_free, hp_1gb_free * 1024))
print(" Used : {} pages, {} MB".format(hp_1gb_used, hp_1gb_used * 1024))
def main():
if sys.argv[2] == "shield":
shield_vm()
sys.exit(0)
if sys.argv[2] == "unshield":
unshield_vm()
sys.exit(0)
if sys.argv[2] == "malloc":
if not allocate_best_hp():
sys.exit(1)
sys.exit(0)
if sys.argv[2] == "free":
if not deallocate_hp():
sys.exit(1)
sys.exit(0)
if sys.argv[2] == "mstate":
mstate()
sys.exit(0)
if sys.argv[2] == "prepare":
sys.exit(prepare())
if sys.argv[2] == "release":
sys.exit(release())
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment