Created
July 28, 2011 13:10
-
-
Save errzey/1111503 to your computer and use it in GitHub Desktop.
Know Thy BPF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bpf filter: "ip" | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 3 | |
(002) ret #96 | |
(003) ret #0 | |
(000) ldh [12] | |
Load half word at packet offset 12 | |
Offset 12 is the eth type. | |
(001) jeq #0x800 jt 2 jf 3 | |
If the eth type is 0x800 take the branch to 2, else 3 | |
(002) ret #96 | |
Return a true value, the packet matched. | |
(003) ret #0 | |
Return a false value, the packet did not match. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bpf filter: ip host 192.168.0.1 | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 7 | |
(002) ld [26] | |
(003) jeq #0xc0a80001 jt 6 jf 4 | |
(004) ld [30] | |
(005) jeq #0xc0a80001 jt 6 jf 7 | |
(006) ret #96 | |
(007) ret #0 | |
(000) ldh [12] | |
Load half word at packet offset 12 | |
Offset 12 is the eth type | |
(001) jeq #0x800 jt 2 jf 7 | |
If the eth type is 0x800 jump to 2, else 7 | |
(002) ld [26] | |
Load the 4 byte value at packet offset 26 | |
This is the source-address within the IP packet. | |
(003) jeq #0xc0a80001 jt 6 jf 4 | |
If the value of the source-address is 0xc0a80001 jump to 6, | |
else jump to 4 | |
(004) ld [30] | |
Load the 4 byte value at packet offset 30. | |
This is the destination-address within the IP packet. | |
(005) jeq #0xc0a80001 jt 6 jf 7 | |
If the value of the destination address is 0xc0a80001 jump to 6, | |
else jump to 7 | |
(006) ret #96 | |
(007) ret #0 | |
Pseudocode: | |
int filter(uchar *packet) | |
{ | |
uint32_t source_address; | |
uint32_t destination_address; | |
if (&packet[12] != 0x800) | |
goto fail; | |
source_address = *(uint32_t*)&packet[26]; | |
if (source_address == 0xc0a80001) | |
goto success; | |
destination_address = *(uint32_t*)&packet[30]; | |
if (destination_address == 0xc0a80001) | |
goto success; | |
goto fail; | |
success: | |
return 96; | |
fail: | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bpf filter: tcp src port not 22 | |
(000) ldh [12] | |
(001) jeq #0x86dd jt 2 jf 6 | |
(002) ldb [20] | |
(003) jeq #0x6 jt 4 jf 15 | |
(004) ldh [54] | |
(005) jeq #0x16 jt 14 jf 15 | |
(006) jeq #0x800 jt 7 jf 15 | |
(007) ldb [23] | |
(008) jeq #0x6 jt 9 jf 15 | |
(009) ldh [20] | |
(010) jset #0x1fff jt 15 jf 11 | |
(011) ldxb 4*([14]&0xf) | |
(012) ldh [x + 14] | |
(013) jeq #0x16 jt 14 jf 15 | |
(014) ret #0 | |
(015) ret #96 | |
(000) ldh [12] | |
(001) jeq #0x86dd jt 2 jf 6 | |
If the ethernet type is 0x86dd (ipv6) go to 2 | |
else go to 6 | |
(002) ldb [20] | |
Load the 1 byte value at packet offset 20 (ipv6 next header) | |
(003) jeq #0x6 jt 4 jf 15 | |
If the next header equals 6 (tcp) jump to 4, else jump to 15 | |
(004) ldh [54] | |
Load the half word value from packet offset 54 (tcp source port) | |
(005) jeq #0x16 jt 14 jf 15 | |
If the source port is 0x16 (22) jump to 14, else jump to 15 | |
(006) jeq #0x800 jt 7 jf 15 | |
If the eth type is 0x800 (ipv4) jump to 7, else jump to 15 | |
(007) ldb [23] | |
Load the 1 byte value at packet offset 23 ( ip proto ) | |
(008) jeq #0x6 jt 9 jf 15 | |
If the ip proto equals 6 (tcp) jump to 9, else jump to 15 | |
(009) ldh [20] | |
Load the half word value at packet offset 20 (flags + frag offset) | |
(010) jset #0x1fff jt 15 jf 11 | |
Only look at the last 13 bits of the data | |
0x1fff == 0001 1111 1111 1111 (fragment offset) | |
If any of the data in fragment offset is true, jump to 15, else jump to 11 | |
Essentially, if this packet is a fragment, return true for packet match | |
(011) ldxb 4*([14]&0xf) | |
x = ip header len * 4 | |
In our case lets assume that we have a default size of 20 bytes. | |
(012) ldh [x + 14] | |
Load the half word at packet offset x+14 (in our case offset 20) | |
20 + 14 == 34 | |
(013) jeq #0x16 jt 14 jf 15 | |
If the value of packet offset 34 is 0x16 (tcp source port 22) jump to 14, else | |
jump to 15 | |
(014) ret #0 | |
Return a non-match | |
(015) ret #96 | |
Return a match | |
Pseudocode: | |
int filter (uchar *packet) { | |
if (*(uint16_t)&packet[12] == 0x86dd) | |
{ | |
uint8_t next_header; | |
uint16_t source_port; | |
next_header = (uint8_t)packet[20]; | |
if (next_header != 0x6) | |
{ | |
goto matched; | |
} | |
source_port = *(uint16_t *)&packet[54]; | |
if (source_port != 0x16) | |
{ | |
goto matched; | |
} | |
else | |
{ | |
goto not_matched; | |
} | |
} | |
if (*(uint16_t *)&packet[12] == 0x800) | |
{ | |
uint8_t ip_proto; | |
ip_proto = packet[23]; | |
if (ip_proto != 0x6) | |
{ | |
goto matched; | |
} | |
uint16_t flags_offset; | |
flags_offset = *(uint16_t *)&packet[20]; | |
if (flags_offset & 0x1fff) | |
{ | |
goto matched; | |
} | |
uint8_t x; | |
x = 4 * (packet[14] & 0xf); | |
uint16_t source_port = *(uint16_t *)&packet[x + 14]; | |
if (source_port != 0x16) | |
{ | |
goto matched; | |
} | |
else | |
{ | |
goto not_matched; | |
} | |
} | |
matched: | |
return 96; | |
not_matched: | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
We want to convert the filter as seen in example3 into a raw bpf, using no type | |
expressions other than "ether" | |
bpf: | |
( | |
ether[12:2] == 0x800 && | |
ether[23:1] == 0x6 && ( | |
ether[20:2] & 0x1fff) == 0 && | |
ether[ 14 + ( ( ether[14:1] & 0xf ) * 4 ) : 2] != 0x16 | |
) || ( | |
ether[12:2] == 0x86dd && | |
ether[20:1] == 0x6 && | |
ether[54:2] != 0x16 | |
) | |
resulting bpf bytecode: | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 12 | |
(002) ldb [23] | |
(003) jeq #0x6 jt 4 jf 18 | |
(004) ldh [20] | |
(005) jset #0x1fff jt 18 jf 6 | |
(006) ldb [14] | |
(007) and #0xf | |
(008) mul #4 | |
(009) tax | |
(010) ldh [x + 14] | |
(011) jeq #0x16 jt 18 jf 17 | |
(012) jeq #0x86dd jt 13 jf 18 | |
(013) ldb [20] | |
(014) jeq #0x6 jt 15 jf 18 | |
(015) ldh [54] | |
(016) jeq #0x16 jt 18 jf 17 | |
(017) ret #96 | |
(018) ret #0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Building a bpf to look for authoritative answers from a dns server. | |
We know that a udp packet is 8 bytes long, so the payload exists at offset 8. | |
The DNS header looks like this: | |
16 bit identifier | |
1 bit identifier stating whether the packet is a question or answer | |
4 bit query field that states the type of message | |
1 bit signifying whether the query is authoritative or not | |
This is all we need to know to facilitate the bpf. | |
we have a minimum of 8 bits to look at | |
00000000 | |
10000100 <-- the ones represent what we want to look at | |
10000100 == 0x84 | |
udp[10:1] & 0x84 == 0x84 | |
easy enough, now lets only look at authoritative nxdomain's | |
now we need to know | |
16 bit id | |
1 bit qa | |
4 bit type | |
1 bit auth | |
1 bit trunc | |
1 bit recurs | |
1 bit recurs avail | |
3 bit reserved | |
4 bit rcode | |
rcode (response codes) values: | |
0 No error condition. | |
1 Unable to interpret query due to format error. | |
2 Unable to process due to server failure. | |
3 Name in query does not exist. | |
4 Type of query not supported. | |
5 Query refused. | |
udp[10:1] & 0x84 == 0x84 && udp[11:1] & 0xf == 3 | |
This created 14 bytecode instructions. Can we optimize more? | |
sure we can look at 2 bytes instead of one | |
udp[10:2] & 0x840f == 0x8403 | |
essentially we want to only look at the bits that we are interested in | |
1000 0100 0000 0011 | |
but we have to AND along the last 4 bits all set | |
1000 0100 0000 1111 | |
then look for the value of | |
1000 0100 0000 0011 = 0x8403 | |
This results in 11 instructions, 3 less than previous | |
A lot of the time our bpf compiler will do special operations | |
to skip over any type of IP options that could be present, lets | |
try to do our filter above to skip over these instructions. | |
Yeah, we would get funked data if there were ip options present, | |
but in most cases these won't even exist. | |
ether[23:1] == 0x11 && ether[44:2] & 0x840f == 0x8403 | |
this results in 6 instructions! 8 less than what we started with. We sacrifice | |
edge cases for performance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
precedence and you. | |
Lets take for example the filter string "ip and host 192.168.0.1" | |
This is saying: "Any packet that is IP, and the source or destination address is "192.168.0.1" then return true. | |
The output is as follows: | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 7 | |
(002) ld [26] | |
(003) jeq #0xc0a80001 jt 6 jf 4 | |
(004) ld [30] | |
(005) jeq #0xc0a80001 jt 6 jf 7 | |
(006) ret #96 | |
(007) ret #0 | |
Bt we can also state the same filter as "ip and src host 192.168.0.1 or dst host 192.168.0.1" | |
resulting in: | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 6 | |
(002) ld [26] | |
(003) jeq #0xc0a80001 jt 10 jf 4 | |
(004) ld [30] | |
(005) jeq #0xc0a80001 jt 10 jf 11 | |
(006) jeq #0x806 jt 8 jf 7 | |
(007) jeq #0x8035 jt 8 jf 11 | |
(008) ld [38] | |
(009) jeq #0xc0a80001 jt 10 jf 11 | |
(010) ret #96 | |
(011) ret #0 | |
Shouldn't these two expressions be the same? Lets look at what is happening in both scenarios. | |
"ip and host 192.168.0.1" | |
... | |
(002) ld [26] | |
(003) jeq #0xcf2cc038 jt 6 jf 4 | |
; Load offset 26 (IP Source-address) | |
; compare to 0xcf2cc038 (192.168.0.1) | |
; if true go to 6 (return true) else go to 4 | |
(004) ld [30] | |
(005) jeq #0xcf2cc038 jt 6 jf 7 | |
; load offset 30 (destination address) | |
; if true go to 6 (true), else return false | |
Simple enough, now lets look at "ip and src host 192.168.0.1 or dst host 192.168.0.1" | |
In reality this is exactly like the first example, but with a little difference. | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 6 | |
; load the ethertype (offset 12) and compare to 0x800 (IP) | |
; if true go to 2 (source/dest check), else go to 6 | |
... | |
(006) jeq #0x806 jt 8 jf 7 | |
(007) jeq #0x8035 jt 8 jf 11 | |
; Since the ethertype is still in our scratch buffer it seems | |
; to be searching for other types: | |
; 0x0806 == ARP | |
; 0x8035 == RARP | |
(008) ld [38] | |
; load destination address into memory | |
Interesting because I specifically stated "IP", not arp or rarp. Is this a bug? | |
In short, no. This is due to operator precedence. It is doing exactly what the filter said to do. | |
This is easier to see by looking at it this way: | |
(ip AND source host 192.168.0.1) OR (dst host 192.168.0.1) | |
The fix? | |
"ip and (src host 192.168.0.1 or dst host 192.168.0.1)" | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 7 | |
(002) ld [26] | |
(003) jeq #0xc0a80001 jt 6 jf 4 | |
(004) ld [30] | |
(005) jeq #0xc0a80001 jt 6 jf 7 | |
(006) ret #96 | |
(007) ret #0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'ip and src net (1.1.1.0/24 or 2.2.2.0/24 or 3.3.3.0/24 or 4.4.4.0/24 or 5.5.5.0/24 or 6.6.0.0/16) and dst host 5.5.5.5' | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 23 | |
(002) ld [26] | |
(003) and #0xffffff00 | |
(004) jeq #0x1010100 jt 20 jf 5 | |
(005) ld [26] | |
(006) and #0xffffff00 | |
(007) jeq #0x2020200 jt 20 jf 8 | |
(008) ld [26] | |
(009) and #0xffffff00 | |
(010) jeq #0x3030300 jt 20 jf 11 | |
(011) ld [26] | |
(012) and #0xffffff00 | |
(013) jeq #0x4040400 jt 20 jf 14 | |
(014) ld [26] | |
(015) and #0xffffff00 | |
(016) jeq #0x5050500 jt 20 jf 17 | |
(017) ld [26] | |
(018) and #0xffff0000 | |
(019) jeq #0x6060000 jt 20 jf 23 | |
(020) ld [30] | |
(021) jeq #0x5050505 jt 22 jf 23 | |
(022) ret #96 | |
(023) ret #0 | |
This could be optimized in bytecode like this: | |
'ip and src net (1.1.1.0/24 or 2.2.2.0/24 or 3.3.3.0/24 or 4.4.4.0/24 or 5.5.5.0/24 or 6.6.0.0/16) and dst host 5.5.5.5' | |
(000) ldh [12] | |
(001) jeq #0x800 jt 2 jf 23 | |
(002) ld [26] | |
(003) and #0xffffff00 | |
(004) jeq #0x1010100 jt 20 jf 5 | |
(005) jeq #0x2020200 jt 20 jf 8 | |
(006) jeq #0x3030300 jt 20 jf 11 | |
(007) jeq #0x4040400 jt 20 jf 14 | |
(008) jeq #0x5050500 jt 20 jf 17 | |
(009) and #0xffff0000 | |
(010) jeq #0x6060000 jt 20 jf 23 | |
(011) and #0xffff0000 | |
(012) jeq #0x6060000 jt 20 jf 23 | |
(013) ld [30] | |
(014) jeq #0x5050505 jt 22 jf 23 | |
(015) ret #96 | |
(016) ret #0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment