Created
June 21, 2018 18:17
-
-
Save little-dude/ad56d56afcd30ea39fafd807e16f05d1 to your computer and use it in GitHub Desktop.
ipv6 parsing in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::fmt; | |
use std::error::Error; | |
use std::str::FromStr; | |
pub struct Ipv4Network(u32, u32); | |
pub struct Ipv6Network(u128, u128); | |
#[derive(Debug)] | |
pub struct MalformedAddress(String); | |
impl fmt::Display for MalformedAddress { | |
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
write!(f, "malformed address: \"{}\"", self.0) | |
} | |
} | |
impl Error for MalformedAddress { | |
fn description(&self) -> &str { | |
"the string cannot be parsed as an IP address" | |
} | |
fn cause(&self) -> Option<&Error> { | |
None | |
} | |
} | |
#[derive(Debug, Copy, Eq, PartialEq, Hash, Clone)] | |
pub struct Ipv6Address(u128); | |
impl FromStr for Ipv6Address { | |
type Err = MalformedAddress; | |
fn from_str(s: &str) -> Result<Self, Self::Err> { | |
// We'll manipulate bytes instead of UTF-8 characters, because the characters that | |
// represent an IPv6 address are supposed to be ASCII characters. | |
let bytes = s.as_bytes(); | |
// The maximimum length of a string representing an IPv6 is the length of: | |
// | |
// 1111:2222:3333:4444:5555:6666:7777:8888 | |
// | |
// The minimum length of a string representing an IPv6 is the length of: | |
// | |
// :: | |
// | |
if bytes.len() > 38 || bytes.len() < 2 { | |
return Err(MalformedAddress(s.into())); | |
} | |
let mut offset = 0; | |
let mut ellipsis: Option<usize> = None; | |
// Handle the special case where the IP start with "::" | |
if bytes[0] == b':' { | |
if bytes[1] == b':' { | |
if bytes.len() == 2 { | |
return Ok(Ipv6Address(0)); | |
} | |
ellipsis = Some(0); | |
offset += 2; | |
} else { | |
// An IPv6 cannot start with a single column. It must be a double column. | |
// So this is an invalid address | |
return Err(MalformedAddress(s.into())); | |
} | |
} | |
// When dealing with IPv6, it's easier to reason in terms of "hextets" instead of octets. | |
// An IPv6 is 8 hextets. At the end, we'll convert that array into an u128. | |
let mut address: [u16; 8] = [0; 8]; | |
// Keep track of the number of hextets we process | |
let mut hextet_index = 0; | |
loop { | |
if offset == bytes.len() { | |
break; | |
} | |
// Try to read an hextet | |
let (bytes_read, hextet) = read_hextet(&bytes[offset..]); | |
// Handle the case where we could not read an hextet | |
if bytes_read == 0 { | |
match bytes[offset] { | |
// We could not read an hextet because the first character in the slace was ":" | |
// This may be because we have two consecutive columns. | |
b':' => { | |
// Check if already saw an ellipsis. If so, fail parsing, because an IPv6 | |
// can only have one ellipsis. | |
if ellipsis.is_some() { | |
return Err(MalformedAddress(s.into())); | |
} | |
// Otherwise, remember the position of the ellipsis. We'll need that later | |
// to count the number of zeros the ellipsis represents. | |
ellipsis = Some(hextet_index); | |
offset += 1; | |
// Continue and try to read the next hextet | |
continue; | |
} | |
// We now the first character does not represent an hexadecimal digit | |
// (otherwise read_hextet() would have read at least one character), and that | |
// it's not ":", so the string does not represent an IPv6 address | |
_ => return Err(MalformedAddress(s.into())), | |
} | |
} | |
// At this point, we know we read an hextet. | |
address[hextet_index] = hextet; | |
offset += bytes_read; | |
hextet_index += 1; | |
// If this was the last hextet of if we reached the end of the buffer, we should be | |
// done | |
if hextet_index == 8 || offset == bytes.len() { | |
break | |
} | |
// Read the next charachter. After a hextet, we usually expect a column, but there's a special | |
// case for IPv6 that ends with an IPv4. | |
match bytes[offset] { | |
// We saw the column, we can continue | |
b':' => offset += 1, | |
// Handle the special IPv4 case, ie address like. Note that the hextet we just read | |
// is part of that IPv4 address: | |
// | |
// aaaa:bbbb:cccc:dddd:eeee:ffff:a.b.c.d. | |
// ^^ | |
// || | |
// hextet we just read, that ---+| | |
// is actually the first byte of +--- dot we're handling | |
// the ipv4. | |
b'.' => { | |
// The hextet was actually part of the IPv4, so not that we start reading the | |
// IPv4 at `offset - bytes_read`. | |
let ipv4: u32 = Ipv4Address::parse(&bytes[offset-bytes_read..])?.into(); | |
// Replace the hextet we just read by the 16 most significant bits of the | |
// IPv4 address (a.b in the comment above) | |
address[hextet_index - 1] = ((ipv4 & 0xffff_0000) >> 16) as u16; | |
// Set the last hextet to the 16 least significant bits of the IPv4 address | |
// (c.d in the comment above) | |
address[hextet_index] = (ipv4 & 0x0000_ffff) as u16; | |
hextet_index += 1; | |
// After successfully parsing an IPv4, we should be done. | |
// If there are bytes left in the buffer, or if we didn't read enough hextet, | |
// we'll fail later. | |
break; | |
} | |
_ => return Err(MalformedAddress(s.into())), | |
} | |
} // end of loop | |
// If we exited the loop, we should have reached the end of the buffer. | |
// If there are trailing characters, parsing should fail. | |
if offset < bytes.len() { | |
return Err(MalformedAddress(s.into())); | |
} | |
if hextet_index == 8 && ellipsis.is_some() { | |
// We parsed an address that looks like 1111:2222::3333:4444:5555:6666:7777, | |
// ie with an empty ellipsis. | |
return Err(MalformedAddress(s.into())); | |
} | |
// We didn't parse enough hextets, but this may be due to an ellipsis | |
if hextet_index < 8 { | |
if let Some(ellipsis_index) = ellipsis { | |
// Count how many zeros the ellipsis accounts for | |
let nb_zeros = 8 - hextet_index; | |
// Shift the hextet that we read after the ellipsis by the number of zeros | |
for index in (ellipsis_index..hextet_index).rev() { | |
address[index+nb_zeros] = address[index]; | |
address[index] = 0; | |
} | |
} else { | |
return Err(MalformedAddress(s.into())); | |
} | |
} | |
// Build the IPv6 address from the array of hextets | |
return Ok(Ipv6Address( | |
((address[0] as u128) << 112) | |
+ ((address[1] as u128) << 96) | |
+ ((address[2] as u128) << 90) | |
+ ((address[3] as u128) << 64) | |
+ ((address[4] as u128) << 48) | |
+ ((address[5] as u128) << 32) | |
+ ((address[6] as u128) << 16) | |
+ address[7] as u128)) | |
} | |
} | |
#[derive(Debug, Copy, Eq, PartialEq, Hash, Clone)] | |
pub struct Ipv4Address(u32); | |
impl Ipv4Address { | |
fn parse(_: &[u8]) -> Result<u32, MalformedAddress> { | |
unimplemented!(); | |
} | |
} | |
/// Check whether an ASCII character represents an hexadecimal digit | |
fn is_hex_digit(byte: u8) -> bool { | |
match byte { | |
b'0' ... b'9' | b'a' ... b'f' | b'A' ... b'F' => true, | |
_ => false, | |
} | |
} | |
/// Convert an ASCII character that represents an hexadecimal digit into this digit | |
fn hex_to_digit(byte: u8) -> u8 { | |
match byte { | |
b'0' ... b'9' => byte - b'0', | |
b'a' ... b'f' => byte - b'a' + 10, | |
b'A' ... b'F' => byte - b'A' + 10, | |
_ => unreachable!(), | |
} | |
} | |
/// Read up to four ASCII characters that represent hexadecimal digits, and return their value, as | |
/// well as the number of characters that were read. If not character is read, `(0, 0)` is | |
/// returned. | |
fn read_hextet(bytes: &[u8]) -> (usize, u16) { | |
let mut count = 0; | |
let mut digits: [u8; 4] = [0; 4]; | |
for b in bytes { | |
if is_hex_digit(*b) { | |
digits[count] = hex_to_digit(*b); | |
count += 1; | |
if count == 4 { | |
break; | |
} | |
} else { | |
break; | |
} | |
} | |
if count == 0 { | |
return (0, 0); | |
} | |
let mut shift = (count - 1) * 4; | |
let mut res = 0; | |
for digit in &digits[0..count] { | |
res += (*digit as u16) << shift; | |
if shift >= 4 { | |
shift -= 4; | |
} else { | |
break; | |
} | |
} | |
(count, res) | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn test_parse_zero() { | |
assert_eq!(Ipv6Address::from_str("::").unwrap(), Ipv6Address(0)); | |
} | |
#[test] | |
fn test_parse_one() { | |
assert_eq!(Ipv6Address::from_str("::1").unwrap(), Ipv6Address(1)); | |
} | |
#[test] | |
fn test_parse_link_local() { | |
assert_eq!(Ipv6Address::from_str("fe80:0000:0000:0000:8657:e6fe:8d5:5325").unwrap(), Ipv6Address(0xfe800000000000008657e6fe08d55325)); | |
assert_eq!(Ipv6Address::from_str("fe80:0:0:0:8657:e6fe:8d5:5325").unwrap(), Ipv6Address(0xfe800000000000008657e6fe08d55325)); | |
assert_eq!(Ipv6Address::from_str("fe80::8657:e6fe:8d5:5325").unwrap(), Ipv6Address(0xfe800000000000008657e6fe08d55325)); | |
} | |
#[test] | |
fn test_parse_trailing_zeros() { | |
assert_eq!(Ipv6Address::from_str("ffff::").unwrap(), Ipv6Address(0xffff0000000000000000000000000000)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment