Skip to content

Instantly share code, notes, and snippets.

@hkwi
Last active March 27, 2024 07:22
Show Gist options
  • Save hkwi/9fc7ebc12790ed10ea55ba38e4f86d0e to your computer and use it in GitHub Desktop.
Save hkwi/9fc7ebc12790ed10ea55ba38e4f86d0e to your computer and use it in GitHub Desktop.
Linux vxlan nat traversal example

This example shows vxlan nat traversal, using UDP hole punching.

         +---------------+
         | (node5) vxlan |
         +---------------+
                  | uplink
       +--------------------+
       | (node4) masquerade |
       +--------------------+
                  |
         +--------+--------+
         |                 |
 +-------+-------+ +-------+-------+
 | (node2) vxlan | | (node3) vxlan |
 +---------------+ +---------------+

Run

  • vagrant up
  • python vxlan_hp.py on node5 (vagrant ssh node5)
  • ping 192.168.5.5 from node2, node3

Limitations

UDP src port sent from inside of the NAT must be same with UDP dst port number, i.e. 4789 IANA assigned number.

Vagrant.configure(2) do |config|
config.vm.box = "minimal/xenial64"
config.vm.synced_folder ".", "/vagrant", disabled: true
config.vm.define :node2 do |node|
node.vm.network :private_network, ip: "192.168.30.2", virtualbox__intnet: "L"
node.vm.provision :shell, inline: <<-SHELL
ip route add 192.168.40.0/24 via 192.168.30.4
ip link add vxlan5 type vxlan id 5 dstport 4789 srcport 4789 4790
ip addr add 192.168.5.2/24 dev vxlan5
ip link set up vxlan5
bridge fdb add 00:00:00:00:00:00 dev vxlan5 dst 192.168.40.5
SHELL
end
config.vm.define :node3 do |node|
node.vm.network :private_network, ip: "192.168.30.3", virtualbox__intnet: "L"
node.vm.provision :shell, inline: <<-SHELL
ip route add 192.168.40.0/24 via 192.168.30.4
ip link add vxlan5 type vxlan id 5 dstport 4789 srcport 4789 4790
ip addr add 192.168.5.3/24 dev vxlan5
ip link set up vxlan5
bridge fdb add 00:00:00:00:00:00 dev vxlan5 dst 192.168.40.5
SHELL
end
config.vm.define :node4 do |node|
node.vm.network :private_network, ip: "192.168.30.4", virtualbox__intnet: "L"
node.vm.network :private_network, ip: "192.168.40.4", virtualbox__intnet: "R"
node.vm.provision :shell, inline: <<-SHELL
apt-get install -y iptables
iptables -t nat -A POSTROUTING -s 192.168.30.0/24 -j MASQUERADE
sysctl net.ipv4.ip_forward=1
SHELL
end
config.vm.define :node5 do |node|
node.vm.network :private_network, ip: "192.168.40.5", virtualbox__intnet: "R"
node.vm.provision :file, source: "vxlan_hp.py", destination: "vxlan_hp.py"
node.vm.provision :file, source: "vxlan_hp.c", destination: "vxlan_hp.c"
node.vm.provision :shell, inline: <<-SHELL
echo "deb [trusted=yes] https://repo.iovisor.org/apt/xenial xenial-nightly main" > /etc/apt/sources.list.d/iovisor.list
apt-get update -y
apt-get install -y bcc-tools
apt-get install -y linux-headers-`uname -r` python-setuptools
easy_install pyroute2
ip link add vxlan5 type vxlan id 5 dstport 4789 srcport 4789 4790
ip addr add 192.168.5.5/24 dev vxlan5
ip link set up vxlan5
SHELL
end
end
#include <bcc/proto.h>
struct peer4_t {
u32 vxlan_key;
u16 udp_port;
};
BPF_HASH(host2nat, u64, struct peer4_t) // mac -> nat
int handle_ingress(struct __sk_buff *skb){
u8 *cursor = 0;
struct ethernet_t *eth = cursor_advance(cursor, sizeof(*eth));
if((eth->dst & (1ULL << 40)) || eth->type != 0x800){
return 1;
}
struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));
if(ip->nextp != 17){
return 1;
}
struct udp_t *udp = cursor_advance(cursor, sizeof(*udp));
if(udp->dport != 4789){
return 1;
}
if(udp->sport == 4789){
return 1;
}
// handle NAT
struct vxlan_t *vxlan = cursor_advance(cursor, sizeof(*vxlan));
struct ethernet_t *eth_in = cursor_advance(cursor, sizeof(*eth_in));
u64 src = eth_in->src;
if(src == 0){
return 0;
}
struct peer4_t peer = {};
peer.vxlan_key = vxlan->key;
peer.udp_port = udp->sport;
host2nat.update(&src, &peer);
return 1;
}
int handle_egress(struct __sk_buff *skb){
u8 *cursor = 0;
struct ethernet_t *eth = cursor_advance(cursor, sizeof(*eth));
if((eth->dst & (1ULL << 40)) || eth->type != 0x800){
return 1;
}
struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));
if(ip->nextp != 17){
return 1;
}
struct udp_t *udp = cursor_advance(cursor, sizeof(*udp));
if(udp->dport != 4789){
return 1;
}
struct vxlan_t *vxlan = cursor_advance(cursor, sizeof(*vxlan));
struct ethernet_t *eth_in = cursor_advance(cursor, sizeof(*eth_in));
u64 dst = eth_in->dst;
// NAT ?
struct peer4_t *nat = host2nat.lookup(&dst);
if(nat != NULL && nat->vxlan_key == vxlan->key){
udp->dport = nat->udp_port;
udp->sport = 4789;
}
return 1;
}
import bcc
import time
from pyroute2 import IPRoute, NetNS, IPDB, NSPopen
b = bcc.BPF(src_file="vxlan_hp.c", debug=0)
fin = b.load_func("handle_ingress", bcc.BPF.SCHED_CLS)
fout = b.load_func("handle_egress", bcc.BPF.SCHED_CLS)
ipr = IPRoute()
ipdb = IPDB(nl=ipr)
ifc = ipdb.interfaces.enp0s8
ipr.tc("add", "ingress", ifc.index, "ffff:")
ipr.tc("add-filter", "bpf", ifc.index, ":1", fd=fin.fd, name=fin.name, parent="ffff:", action="ok", classid=1)
ipr.tc("add", "sfq", ifc.index, "1:")
ipr.tc("add-filter", "bpf", ifc.index, ":1", fd=fout.fd, name=fout.name, parent="1:", action="ok", classid=1)
while True:
time.sleep(5)
@yvesgagnon
Copy link

Small typo in vxlan_hp.c, ";" missing
replace BPF_HASH(host2nat, u64, struct peer4_t) // mac -> nat
by BPF_HASH(host2nat, u64, struct peer4_t); // mac -> nat

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment