Skip to content

Instantly share code, notes, and snippets.

@amodm
Created July 3, 2024 15:18
Show Gist options
  • Save amodm/a61e6d0c413e8cc9ac4c56a803150daf to your computer and use it in GitHub Desktop.
Save amodm/a61e6d0c413e8cc9ac4c56a803150daf to your computer and use it in GitHub Desktop.
/* See the corresponding blog post for details:
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
*/
#pragma once
#include <net/if_var.h>
#pragma pack(4)
struct ifbreq {
char ifbr_ifsname[IFNAMSIZ]; /* member if name */
uint32_t ifbr_ifsflags; /* member if flags */
uint32_t ifbr_stpflags; /* member if STP flags */
uint32_t ifbr_path_cost; /* member if STP cost */
uint8_t ifbr_portno; /* member if port number */
uint8_t ifbr_priority; /* member if STP priority */
uint8_t ifbr_proto; /* member if STP protocol */
uint8_t ifbr_role; /* member if STP role */
uint8_t ifbr_state; /* member if STP state */
uint32_t ifbr_addrcnt; /* member if addr number */
uint32_t ifbr_addrmax; /* member if addr max */
uint32_t ifbr_addrexceeded; /* member if addr violations */
uint8_t pad[32];
};
struct ifbifconf {
uint32_t ifbic_len; /* buffer size */
union {
caddr_t ifbicu_buf;
struct ifbreq *ifbicu_req;
#define ifbic_buf ifbic_ifbicu.ifbicu_buf
#define ifbic_req ifbic_ifbicu.ifbicu_req
} ifbic_ifbicu;
};
/* See the corresponding blog post for details:
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
*/
#pragma once
#include <net/if_var.h>
/* -----------------------------------------------------
* Fake ethernet related headers.
* https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_fake_var.h.auto.html
* -----------------------------------------------------
*/
/*
* SIOCSDRVSPEC
*/
enum {
IF_FAKE_S_CMD_NONE = 0,
IF_FAKE_S_CMD_SET_PEER = 1,
IF_FAKE_S_CMD_SET_MEDIA = 2,
IF_FAKE_S_CMD_SET_DEQUEUE_STALL = 3,
};
/*
* SIOCGDRVSPEC
*/
enum {
IF_FAKE_G_CMD_NONE = 0,
IF_FAKE_G_CMD_GET_PEER = 1,
};
#define IF_FAKE_MEDIA_LIST_MAX 27
struct if_fake_media {
int32_t iffm_current;
uint32_t iffm_count;
uint32_t iffm_reserved[3];
int32_t iffm_list[IF_FAKE_MEDIA_LIST_MAX];
};
struct if_fake_request {
uint64_t iffr_reserved[4];
union {
char iffru_buf[128]; /* stable size */
struct if_fake_media iffru_media;
char iffru_peer_name[IFNAMSIZ]; /* if name, e.g. "en0" */
/*
* control dequeue stall. 0: disable dequeue stall, else
* enable dequeue stall.
*/
uint32_t iffru_dequeue_stall;
} iffr_u;
#define iffr_peer_name iffr_u.iffru_peer_name
#define iffr_media iffr_u.iffru_media
#define iffr_dequeue_stall iffr_u.iffru_dequeue_stall
};
// See the corresponding blog post for details:
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
import Foundation
// xnu is a custom module that I created to expose the relevant C structs
// that the kernel expects, as those structs are not part of the userspace
// API. This module contains C-bridge headers if-fake.h and if-bridge.h
// which are also shown in this gist.
import xnu
struct NetworkInterface {
let name: String
let mac: ether_addr_t
let ips: [String]
let type: UInt32
let flags: UInt32
var isBridge: Bool {
return type == UInt(IFT_BRIDGE)
}
var isLoopback: Bool {
return flags & UInt32(IFF_LOOPBACK) != 0
}
var isFakeEth: Bool {
return name.starts(with: "feth") // TODO: figure out type?
}
var up: Bool {
return flags & UInt32(IFF_UP) != 0
}
func changeStatus(up: Bool) throws {
try Self.changeStatus(name: name, up: up)
}
/// - Returns: all network interfaces currently configured on this system.
static var all: [NetworkInterface] {
var ifap: UnsafeMutablePointer<ifaddrs>? = nil
guard getifaddrs(&ifap) == 0 else {
fatalError("getifaddrs() failed: \(String(cString: strerror(errno)))")
}
defer { freeifaddrs(ifap) }
var interfaces = [NetworkInterface]()
try! withControlSocket { ctl in
for ifa in sequence(first: ifap, next: { $0?.pointee.ifa_next }) {
if let ifa = ifa?.pointee {
let ifname = String(cString: ifa.ifa_name)
let flags = ifa.ifa_flags
var ips = [String]()
var mac = ether_addr_t()
switch Int32(ifa.ifa_addr.pointee.sa_family) {
case AF_LINK:
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_dl.self, capacity: 1) { $0.pointee }
mac = withUnsafeMutableBytes(of: &addr.sdl_data) { ptr in
ptr.baseAddress!.advanced(by: Int(addr.sdl_nlen)).assumingMemoryBound(to: ether_addr_t.self).pointee
}
case AF_INET:
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in.self, capacity: 1) { $0.pointee }
var ip = [CChar](repeating: 0, count: Int(INET_ADDRSTRLEN))
inet_ntop(AF_INET, &addr.sin_addr, &ip, socklen_t(INET_ADDRSTRLEN))
ips.append(String(cString: ip))
case AF_INET6:
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in6.self, capacity: 1) { $0.pointee }
var ip = [CChar](repeating: 0, count: Int(INET6_ADDRSTRLEN))
inet_ntop(AF_INET6, &addr.sin6_addr, &ip, socklen_t(INET6_ADDRSTRLEN))
ips.append(String(cString: ip))
default:
continue
}
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout<ifreq>.size)
ifname.copyTo(&ifr.ifr_name)
guard ioctl(ctl, IfIoctl.SIOCFIFTYPE, &ifr) == 0 else {
fatalError("\(ifname):ioctl(SIOCFIFTYPE): \(String(cString: strerror(errno)))")
}
let type = ifr.ifr_ifru.ifru_functional_type
interfaces.append(NetworkInterface(name: ifname, mac: mac, ips: ips, type: type, flags: flags))
}
}
}
return interfaces
}
private static func withControlSocket<T>(_ family: Int32 = AF_LOCAL, _ body: (Int32) throws -> T) throws -> T {
let sock = socket(AF_LOCAL, SOCK_DGRAM, 0)
guard sock >= 0 else {
throw RVMError.sycallError("control:socket()")
}
defer { close(sock) }
return try body(sock)
}
/// Creates a fake eth interface, and peers with `peer` (if provided).
/// - Parameter peer: the peer to connect to
/// - Returns: the name of the fake eth interface that was created.
static func createFakeEth(peer: String? = nil) throws -> String {
let allFakeEths = Set(all.filter { $0.isFakeEth }.map { $0.name })
for i in 0..<128 {
let name = "feth\(i)"
if !allFakeEths.contains(name) {
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr))
name.copyTo(&ifr.ifr_name)
ifr.ifr_ifru.ifru_flags = Int16(IFF_UP | IFF_RUNNING)
// create
try withControlSocket { ctl in
guard ioctl(ctl, IfIoctl.SIOCIFCREATE2, &ifr) == 0 else {
throw RVMError.sycallError("feth:create()")
}
if peer != nil {
// from https://opensource.apple.com/source/network_cmds/network_cmds-606.40.2/ifconfig.tproj/iffake.c.auto.html
var iffr = if_fake_request()
memset(&iffr, 0, MemoryLayout.size(ofValue: iffr))
peer!.copyTo(&iffr.iffr_u.iffru_peer_name)
var ifd = ifdrv()
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd))
name.copyTo(&ifd.ifd_name)
ifd.ifd_cmd = UInt(IF_FAKE_S_CMD_SET_PEER)
withUnsafeMutablePointer(to: &iffr) { ifd.ifd_data = UnsafeMutableRawPointer($0) }
ifd.ifd_len = MemoryLayout.size(ofValue: iffr)
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else {
throw RVMError.sycallError("feth:ioctl(set-peer)")
}
}
}
return name
}
}
throw RVMError.illegalState("feth:create(): out of options")
}
/// Deletes the network interface with the given name.
/// - Parameter name: the name of the network interface to delete.
static func deleteInterface(_ name: String) throws {
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr))
name.copyTo(&ifr.ifr_name)
try withControlSocket { ctl in
guard ioctl(ctl, IfIoctl.SIOCIFDESTROY, &ifr) == 0 else {
throw RVMError.sycallError("\(name):ioctl(SIOCIFDESTROY)")
}
}
}
/// Creates a pair of fake eth interfaces, and peers them together.
/// - Returns: the names of the two fake eth interfaces that were created.
static func createFakeEthPair() throws -> (String, String) {
let feth1 = try createFakeEth()
let feth2 = try createFakeEth(peer: feth1)
try changeStatus(name: feth1, up: true)
try changeStatus(name: feth2, up: true)
return (feth1, feth2)
}
/// Change the status of the network interface with the given name.
/// - Parameters:
/// - name: the name of the network interface
/// - up: whether to bring the interface up or down
/// - Throws: an error if the operation fails
static func changeStatus(name: String, up: Bool) throws {
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr))
name.copyTo(&ifr.ifr_name)
try NetworkInterface.withControlSocket(AF_INET) { ctl in
guard ioctl(ctl, IfIoctl.SIOCGIFFLAGS, &ifr) == 0 else {
throw RVMError.sycallError("\(name):ioctl(SIOCGIFFLAGS)")
}
let oldFlag = Int32(ifr.ifr_ifru.ifru_flags) & 0xffff
var newFlag = oldFlag
if up {
newFlag |= Int32(IFF_UP | IFF_RUNNING)
} else {
newFlag &= ~Int32(IFF_UP | IFF_RUNNING)
}
if oldFlag != newFlag {
ifr.ifr_ifru.ifru_flags = Int16(bitPattern: UInt16(newFlag & 0xffff))
guard ioctl(ctl, IfIoctl.SIOCSIFFLAGS, &ifr) >= 0 else {
throw RVMError.sycallError("\(name):ioctl(SIOCSIFFLAGS)")
}
}
}
}
/// Adds `ifc` to the network bridge `bridge`.
/// - Parameters:
/// - ifc: the network interface to add to the bridge.
/// - bridge: the network bridge.
static func addInterfaceToBridge(_ ifc: String, to bridge: String) throws {
var req = ifbreq()
memset(&req, 0, MemoryLayout.size(ofValue: req))
ifc.copyTo(&req.ifbr_ifsname)
var ifd = ifdrv()
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd))
bridge.copyTo(&ifd.ifd_name)
ifd.ifd_cmd = 0 // BRDGADD: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) }
ifd.ifd_len = MemoryLayout.size(ofValue: req)
try withControlSocket { ctl in
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else {
throw RVMError.sycallError("bridge(\(bridge)):add-if(\(ifc))")
}
}
}
/// Ensures that `member` is a member of the `bridge` network interface.
/// - Returns: `true` if the member was added, `false` if it was already a member.
static func ensureBridgeMembership(bridge: String, member: String) throws -> Bool {
var req = ifbreq()
memset(&req, 0, MemoryLayout.size(ofValue: req))
member.copyTo(&req.ifbr_ifsname)
var ifd = ifdrv()
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd))
bridge.copyTo(&ifd.ifd_name)
ifd.ifd_cmd = 2 // BRDGGIFFLGS: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) }
ifd.ifd_len = MemoryLayout.size(ofValue: req)
return try withControlSocket { ctl in
if ioctl(ctl, IfIoctl.SIOCGDRVSPEC, &ifd) < 0 {
if errno == ENOENT {
try addInterfaceToBridge(member, to: bridge)
return true
} else {
throw RVMError.sycallError("bridge(\(bridge)):getifflags(\(member))")
}
}
return false
}
}
}
func _IOC(_ dir: UInt32, _ g: Character, _ n: UInt, _ l: Int) -> UInt {
return UInt(dir) | ((UInt(l) & UInt(IOCPARM_MASK)) << 16) | (UInt(g.asciiValue ?? 0) << 8) | n
}
func _IO(_ g: Character, _ n: UInt) -> UInt {
return _IOC(IOC_VOID, g, n, 0)
}
func _IOW<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt {
return _IOC(IOC_IN, char, nr, MemoryLayout<T>.size)
}
func _IOR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt {
return _IOC(IOC_OUT, char, nr, MemoryLayout<T>.size)
}
func _IOWR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt {
return _IOC(IOC_INOUT, char, nr, MemoryLayout<T>.size)
}
enum IfIoctl {
static let SIOCSIFFLAGS = _IOW("i", 16, ifreq.self)
static let SIOCGIFFLAGS = _IOWR("i", 17, ifreq.self)
static let SIOCGIFMEDIA = _IOWR("i", 56, ifmediareq.self)
static let SIOCIFCREATE = _IOWR("i", 120, ifreq.self)
static let SIOCIFDESTROY = _IOW("i", 121, ifreq.self)
static let SIOCIFCREATE2 = _IOWR("i", 122, ifreq.self)
static let SIOCSDRVSPEC = _IOW("i", 123, ifdrv.self)
static let SIOCGDRVSPEC = _IOWR("i", 123, ifdrv.self)
static let SIOCFIFTYPE = _IOWR("i", 159, ifreq.self)
}
// See the corresponding blog post for details:
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
import Darwin
import Foundation
import Virtualization
// we poll via kqeueues in this thread
final class NetworkSwitch: Thread {
static var shared = NetworkSwitch()
static var logger: VMLogFacility = {
VMFileLogger.shared.newFacility("nwswitch")
}()
private var sockDevs: [VSockDev] = []
func newBridgePort(hostBridge: String, vMac: ether_addr_t) throws -> VZFileHandleNetworkDeviceAttachment {
if isExecuting {
throw RVMError.illegalState("cannot add port after switch has started")
}
let vsockDev = try VSockDev(hostBridge: hostBridge, vMac: vMac)
sockDevs.append(vsockDev)
return VZFileHandleNetworkDeviceAttachment(fileHandle: FileHandle(fileDescriptor: vsockDev.remoteSocket))
}
/// Checks every bridge port and ensures that the bridge contains our interface.
func ensureBridgeMembership() {
for dev in sockDevs {
if dev.isBridge {
do {
if try NetworkInterface.ensureBridgeMembership(bridge: dev.hostInterface, member: dev.fethBridgeSide) {
NetworkSwitch.logger.info("readded \(dev.fethBridgeSide) to bridge \(dev.hostInterface)")
}
} catch {
NetworkSwitch.logger.error("\(error)")
}
}
}
}
private static func kqChangeList(_ capacity: Int) -> UnsafeMutablePointer<kevent> {
let ptr = UnsafeMutablePointer<kevent>.allocate(capacity: capacity)
ptr.initialize(repeating: kevent(), count: capacity)
return ptr
}
override func main() {
if !sockDevs.isEmpty {
defer {
// close all sockets
for dev in sockDevs {
dev.close()
}
}
let kq = kqueue()
if kq < 0 {
fatalError("kqueue() failed: \(String(cString: strerror(errno)))")
}
defer { close(kq) }
let kqs = KQSockets(sockDevs)
while !isCancelled {
if kqs.onEvent(kq) < 0 {
if errno == EINTR || errno == EAGAIN {
continue
}
NetworkSwitch.logger.error("onEvent() failed: \(String(cString: strerror(errno)))")
}
}
// cleanup
for dev in sockDevs {
dev.close()
}
}
}
func cancelAndJoin(_ pollTimeNanos: UInt64 = 100_000_000) async throws {
cancel()
while !isFinished {
try await Task.sleep(nanoseconds: pollTimeNanos)
}
}
}
private struct VSockDev {
let hostInterface: String
let vMac: ether_addr_t
let vmSocket: Int32
let remoteSocket: Int32
let bpfSocket: Int32
let ndrvSocket: Int32
let bpfBufferSize: Int
let bpfReadBuffer: UnsafeMutableRawBufferPointer
let bpfFilter: [bpf_insn]
let fethBridgeSide: String
let fethVmSide: String
let isBridge: Bool
var bpfStats: bpf_stat {
var stats = bpf_stat()
return ioctl(bpfSocket, BpfIoctl.BIOCGSTATS, &stats) == 0 ? stats : bpf_stat(bs_recv: 0, bs_drop: 0)
}
init(hostBridge: String, vMac: ether_addr_t) throws {
self.hostInterface = hostBridge
self.isBridge = NetworkInterface.all.first(where: { $0.name == hostBridge })?.isBridge ?? false
self.vMac = vMac
(fethBridgeSide, fethVmSide) = isBridge ? try NetworkInterface.createFakeEthPair() : (hostBridge, hostBridge)
var socketPair: (Int32, Int32) = (0, 0)
withUnsafePointer(to: &socketPair) {
let ptr = UnsafeMutableRawPointer(mutating: $0).bindMemory(to: Int32.self, capacity: 2)
guard socketpair(PF_LOCAL, SOCK_DGRAM, 0, ptr) == 0 else {
fatalError("socketpair() failed: \(String(cString: strerror(errno)))")
}
}
(vmSocket, remoteSocket) = socketPair
// set buffer size
var size = 1024 * 1024 * 8
setsockopt(vmSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size))
setsockopt(vmSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size))
setsockopt(remoteSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size))
setsockopt(remoteSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size))
self.bpfBufferSize = Int(BPF_MAXBUFSIZE)
self.bpfReadBuffer = UnsafeMutableRawBufferPointer.allocate(byteCount: bpfBufferSize, alignment: 16)
let vmacTop2 = UInt32(vMac.octet.0) << 8 | UInt32(vMac.octet.1)
let vmacBottom4 = UInt32(vMac.octet.2) << 24 | UInt32(vMac.octet.3) << 16 | UInt32(vMac.octet.4) << 8 | UInt32(vMac.octet.5)
self.bpfFilter = [
// [0] the following 4 statements do `ether dst host <vMac>`
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_W | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6]
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 2, k: vmacBottom4), // if == vMac[2..<6], proceed to next else skip-2
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 0), // ldh dst_host_ether[0..<2] (msb 2 bytes)
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 3, jf: 4, k: vmacTop2), // if == vMac[0..<2], skip-3 (true) else skip-4 (false)
// [4] the following 3 statements do `ether dst broadcast`
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 3, k: 0xffffffff), // if == 0xffffffff (broadcast), next else skip-3 (false)
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6]
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 1, k: 0xffff), // if == 0xffff (broadcast), next (true) else skip-1 (false)
// [7] return true (capture max packet size)
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: UInt32(self.bpfBufferSize)),
// [8] return false
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: 0), // ret false
]
self.ndrvSocket = Self.ndrvSocket(fethVmSide)
self.bpfSocket = Self.bpfSocket(fethVmSide, self.bpfBufferSize, self.bpfFilter)
}
/// Route traffic between host and vm, depending upon the `event`
func routeTraffic(_ event: kevent64_s) -> Bool {
let fd = Int32(event.ident)
if fd == vmSocket {
vmToHost(event)
} else if fd == bpfSocket {
hostToVM(event)
} else {
return false
}
return true
}
/// Route traffic from host to VM by reading from bpfSocket and writing to vmSocket.
func hostToVM(_ event: kevent64_s) {
var numPackets = 0, wlen = 0, wlenActual = 0
let buffer = bpfReadBuffer.baseAddress!
let len = read(bpfSocket, buffer, bpfBufferSize)
if len > 0 {
let endPtr = buffer.advanced(by: len)
var pktPtr = buffer.assumingMemoryBound(to: bpf_hdr.self)
while pktPtr < endPtr {
// for each packet
let hdr = pktPtr.pointee
let nextPktPtr = UnsafeMutableRawPointer(pktPtr).advanced(by: Int(hdr.bh_caplen) + Int(hdr.bh_hdrlen))
if hdr.bh_caplen > 0 {
if nextPktPtr > endPtr {
NetworkSwitch.logger.error("\(hostInterface)-h2g: nextPktPtr out of bounds: \(nextPktPtr) > \(endPtr). current pktPtr=\(pktPtr) hdr=\(hdr)", throttleKey: "h2g-next-oob")
}
let hdr = pktPtr.pointee
let dataPtr = UnsafeMutableRawPointer(mutating: pktPtr).advanced(by: Int(hdr.bh_hdrlen))
let writeLen = write(vmSocket, dataPtr, Int(hdr.bh_caplen))
numPackets += 1
wlen += Int(hdr.bh_caplen)
wlenActual += writeLen
if writeLen < 0 {
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-writ-fail")
} else if writeLen != Int(hdr.bh_caplen) {
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: partial write", throttleKey: "h2g-writ-partial")
}
}
pktPtr = nextPktPtr.alignedUp(toMultipleOf: BPF_ALIGNMENT).assumingMemoryBound(to: bpf_hdr.self)
}
} else if len == 0 {
NetworkSwitch.logger.error("\(hostInterface)-h2g: EOF", throttleKey: "h2g-eof")
} else if errno != EAGAIN && errno != EINTR {
NetworkSwitch.logger.error("\(hostInterface)-h2g: read() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-read-fail")
}
}
/// Send traffic from VM to host by reading from vmSocket and writing to ndrv socket.
func vmToHost(_ event: kevent64_s, onlyOne: Bool = true) {
let availableLen = min(bpfReadBuffer.count, Int(event.data))
let basePtr = bpfReadBuffer.baseAddress!
var offset = 0
while offset < availableLen {
let n = read(vmSocket, basePtr, availableLen - offset)
if n > 0 {
let len = write(ndrvSocket, basePtr, n)
if len != n {
if len < 0 {
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: \(String(cString: strerror(errno)))", throttleKey: "g2h-writ-fail")
} else if errno != EAGAIN && errno != EINTR {
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: partial write", throttleKey: "g2h-writ-partial")
}
break
}
offset += n
if onlyOne {
break
}
} else {
if n == 0 {
NetworkSwitch.logger.error("\(hostInterface)-g2h: EOF", throttleKey: "g2h-eof")
} else if errno != EAGAIN && errno != EINTR {
NetworkSwitch.logger.error("\(hostInterface)-g2h: read() failed: \(String(cString: strerror(errno))): e=\(event)", throttleKey: "g2h-read-fail")
}
break
}
}
}
static func bpfSocket(_ ifc: String, _ buffSize: Int, _ bpfFilter: [bpf_insn]) -> Int32 {
// TODO: modify sysctl debug.bpf_maxbufsize and use that size
for i in 1..<256 {
let dev = "/dev/bpf\(i)"
let fd = open(dev, O_RDONLY)
if fd >= 0 {
// set buffer size
var arg = buffSize
guard ioctl(fd, BpfIoctl.BIOCSBLEN, &arg) == 0 else {
fatalError("bpf \(dev) ioctl(BIOCSBLEN) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// set immediate mode to true
arg = 1
guard ioctl(fd, BpfIoctl.BIOCIMMEDIATE, &arg) == 0 else {
fatalError("bpf ioctl(BIOCIMMEDIATE) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// see only received packets, not generated locally
arg = 0
guard ioctl(fd, BpfIoctl.BIOCSSEESENT, &arg) == 0 else {
fatalError("bpf ioctl(BIOCSSEESENT) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// bind to interface
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout<ifreq>.size)
ifc.copyTo(&ifr.ifr_name)
guard ioctl(fd, BpfIoctl.BIOCSETIF, &ifr) == 0 else {
fatalError("bpf ioctl(BIOCSETIF) failed for \(ifc): \(String(cString: strerror(errno)))")
}
arg = 1
guard ioctl(fd, BpfIoctl.BIOCSHDRCMPLT, &arg) == 0 else {
fatalError("bpf ioctl(BIOCSHDRCMPLT) failed for \(ifc): \(String(cString: strerror(errno)))")
}
arg = 1
guard ioctl(fd, BpfIoctl.BIOCPROMISC, &arg) == 0 else {
fatalError("bpf ioctl(BIOCPROMISC) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// set filter
var filter = bpf_program()
filter.bf_len = UInt32(bpfFilter.count)
filter.bf_insns = UnsafeMutablePointer<bpf_insn>.allocate(capacity: bpfFilter.count)
for i in 0..<bpfFilter.count {
filter.bf_insns[i] = bpfFilter[i]
}
guard ioctl(fd, BpfIoctl.BIOCSETFNR, &filter) == 0 else {
fatalError("bpf ioctl(BIOCSETFNR) failed for \(ifc): \(String(cString: strerror(errno)))")
}
return fd
}
}
fatalError("bpf open() failed for \(ifc): \(String(cString: strerror(errno)))")
}
static func ndrvSocket(_ ifc: String) -> Int32 {
let fd = socket(PF_NDRV, SOCK_RAW, 0)
guard fd >= 0 else {
fatalError("ndrv socket() failed for \(ifc): \(String(cString: strerror(errno)))")
}
// bind to interface
var nd = sockaddr_ndrv()
nd.snd_len = UInt8(MemoryLayout<sockaddr_ndrv>.size)
nd.snd_family = UInt8(AF_NDRV)
ifc.copyTo(&nd.snd_name)
withUnsafePointer(to: &nd) { nd_ptr in
nd_ptr.withMemoryRebound(to: sockaddr.self, capacity: 1) { nd_ptr in
if Darwin.bind(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 {
fatalError("ndrv bind() failed for \(ifc): \(String(cString: strerror(errno)))")
}
if Darwin.connect(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 {
fatalError("ndrv connect() failed for \(ifc): \(String(cString: strerror(errno)))")
}
}
}
return fd
}
func close() {
Darwin.close(vmSocket)
Darwin.close(remoteSocket)
Darwin.close(bpfSocket)
Darwin.close(ndrvSocket)
if isBridge {
try? NetworkInterface.deleteInterface(self.fethBridgeSide)
try? NetworkInterface.deleteInterface(self.fethVmSide)
}
}
}
private struct KQSockets {
private let ptr: UnsafeMutablePointer<kevent64_s>
private let eventsPtr: UnsafeMutablePointer<kevent64_s>
private let sockDevs: [VSockDev]
init(_ sockDevs: [VSockDev]) {
self.sockDevs = sockDevs
let capacity = sockDevs.count * 2
self.ptr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity)
self.ptr.initialize(repeating: kevent64_s(), count: capacity)
self.eventsPtr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity)
self.eventsPtr.initialize(repeating: kevent64_s(), count: capacity)
for i in 0..<sockDevs.count {
guard Foundation.fcntl(sockDevs[i].vmSocket, F_SETFL, O_NONBLOCK) == 0 else {
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) vmSocket: \(String(cString: strerror(errno)))")
}
guard Foundation.fcntl(sockDevs[i].bpfSocket, F_SETFL, O_NONBLOCK) == 0 else {
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) bpfSocket: \(String(cString: strerror(errno)))")
}
self.ptr.advanced(by: 2*i).pointee = kevent64_s(
ident: UInt64(sockDevs[i].vmSocket),
filter: Int16(EVFILT_READ),
flags: UInt16(EV_ADD | EV_ENABLE),
fflags: 0,
data: 0,
udata: 0,
ext: (0, 0)
)
self.ptr.advanced(by: 2*i+1).pointee = kevent64_s(
ident: UInt64(sockDevs[i].bpfSocket),
filter: Int16(EVFILT_READ),
flags: UInt16(EV_ADD | EV_ENABLE),
fflags: 0,
data: 0,
udata: 0,
ext: (0, 0)
)
}
}
func onEvent(_ kq: Int32) -> Int {
let timeoutMillis: Int = 1000
let timeoutSecs = timeoutMillis / 1000
let timeoutNanos = (timeoutMillis % 1000) * 1_000_000
var timeout = timespec(tv_sec: timeoutSecs, tv_nsec: timeoutNanos)
let len = sockDevs.count * 2
let numEvents = Int(kevent64(kq, ptr, Int32(len), eventsPtr, Int32(len), 0, &timeout))
if numEvents > 0 {
eventLoop: for i in 0..<len {
let evt = eventsPtr.advanced(by: i).pointee
if evt.flags & UInt16(EV_ERROR) != 0 {
NetworkSwitch.logger.error("evt-error: \(String(cString: strerror(Int32(evt.data))))", throttleKey: "kq-evt-error")
} else if evt.data > 0 {
let fd = Int32(evt.ident)
for j in 0..<sockDevs.count {
let dev = sockDevs[j]
if dev.vmSocket == fd {
dev.vmToHost(evt)
continue eventLoop
} else if dev.bpfSocket == fd {
dev.hostToVM(evt)
continue eventLoop
} else {
continue
}
}
NetworkSwitch.logger.error("no route found for event: \(evt)", throttleKey: "kq-no-route")
}
}
}
return numEvents
}
}
private let BPF_ALIGNMENT = MemoryLayout<Int32>.size
enum BpfIoctl {
static let BIOCSBLEN = _IOWR("B", 102, CUnsignedInt.self)
static let BIOCPROMISC = _IO("B", 105)
static let BIOCSETIF = _IOW("B", 108, ifreq.self)
static let BIOCGSTATS = _IOR("B", 111, bpf_stat.self)
static let BIOCIMMEDIATE = _IOW("B", 112, CUnsignedInt.self)
static let BIOCSHDRCMPLT = _IOW("B", 117, CUnsignedInt.self)
static let BIOCSSEESENT = _IOW("B", 119, CUnsignedInt.self)
static let BIOCSETFNR = _IOW("B", 126, bpf_program.self)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment