Skip to content

Instantly share code, notes, and snippets.

@little-dude
Created June 21, 2018 18:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save little-dude/ad56d56afcd30ea39fafd807e16f05d1 to your computer and use it in GitHub Desktop.
Save little-dude/ad56d56afcd30ea39fafd807e16f05d1 to your computer and use it in GitHub Desktop.
ipv6 parsing in Rust
use std::fmt;
use std::error::Error;
use std::str::FromStr;
pub struct Ipv4Network(u32, u32);
pub struct Ipv6Network(u128, u128);
#[derive(Debug)]
pub struct MalformedAddress(String);
impl fmt::Display for MalformedAddress {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "malformed address: \"{}\"", self.0)
}
}
impl Error for MalformedAddress {
fn description(&self) -> &str {
"the string cannot be parsed as an IP address"
}
fn cause(&self) -> Option<&Error> {
None
}
}
#[derive(Debug, Copy, Eq, PartialEq, Hash, Clone)]
pub struct Ipv6Address(u128);
impl FromStr for Ipv6Address {
type Err = MalformedAddress;
fn from_str(s: &str) -> Result<Self, Self::Err> {
// We'll manipulate bytes instead of UTF-8 characters, because the characters that
// represent an IPv6 address are supposed to be ASCII characters.
let bytes = s.as_bytes();
// The maximimum length of a string representing an IPv6 is the length of:
//
// 1111:2222:3333:4444:5555:6666:7777:8888
//
// The minimum length of a string representing an IPv6 is the length of:
//
// ::
//
if bytes.len() > 38 || bytes.len() < 2 {
return Err(MalformedAddress(s.into()));
}
let mut offset = 0;
let mut ellipsis: Option<usize> = None;
// Handle the special case where the IP start with "::"
if bytes[0] == b':' {
if bytes[1] == b':' {
if bytes.len() == 2 {
return Ok(Ipv6Address(0));
}
ellipsis = Some(0);
offset += 2;
} else {
// An IPv6 cannot start with a single column. It must be a double column.
// So this is an invalid address
return Err(MalformedAddress(s.into()));
}
}
// When dealing with IPv6, it's easier to reason in terms of "hextets" instead of octets.
// An IPv6 is 8 hextets. At the end, we'll convert that array into an u128.
let mut address: [u16; 8] = [0; 8];
// Keep track of the number of hextets we process
let mut hextet_index = 0;
loop {
if offset == bytes.len() {
break;
}
// Try to read an hextet
let (bytes_read, hextet) = read_hextet(&bytes[offset..]);
// Handle the case where we could not read an hextet
if bytes_read == 0 {
match bytes[offset] {
// We could not read an hextet because the first character in the slace was ":"
// This may be because we have two consecutive columns.
b':' => {
// Check if already saw an ellipsis. If so, fail parsing, because an IPv6
// can only have one ellipsis.
if ellipsis.is_some() {
return Err(MalformedAddress(s.into()));
}
// Otherwise, remember the position of the ellipsis. We'll need that later
// to count the number of zeros the ellipsis represents.
ellipsis = Some(hextet_index);
offset += 1;
// Continue and try to read the next hextet
continue;
}
// We now the first character does not represent an hexadecimal digit
// (otherwise read_hextet() would have read at least one character), and that
// it's not ":", so the string does not represent an IPv6 address
_ => return Err(MalformedAddress(s.into())),
}
}
// At this point, we know we read an hextet.
address[hextet_index] = hextet;
offset += bytes_read;
hextet_index += 1;
// If this was the last hextet of if we reached the end of the buffer, we should be
// done
if hextet_index == 8 || offset == bytes.len() {
break
}
// Read the next charachter. After a hextet, we usually expect a column, but there's a special
// case for IPv6 that ends with an IPv4.
match bytes[offset] {
// We saw the column, we can continue
b':' => offset += 1,
// Handle the special IPv4 case, ie address like. Note that the hextet we just read
// is part of that IPv4 address:
//
// aaaa:bbbb:cccc:dddd:eeee:ffff:a.b.c.d.
// ^^
// ||
// hextet we just read, that ---+|
// is actually the first byte of +--- dot we're handling
// the ipv4.
b'.' => {
// The hextet was actually part of the IPv4, so not that we start reading the
// IPv4 at `offset - bytes_read`.
let ipv4: u32 = Ipv4Address::parse(&bytes[offset-bytes_read..])?.into();
// Replace the hextet we just read by the 16 most significant bits of the
// IPv4 address (a.b in the comment above)
address[hextet_index - 1] = ((ipv4 & 0xffff_0000) >> 16) as u16;
// Set the last hextet to the 16 least significant bits of the IPv4 address
// (c.d in the comment above)
address[hextet_index] = (ipv4 & 0x0000_ffff) as u16;
hextet_index += 1;
// After successfully parsing an IPv4, we should be done.
// If there are bytes left in the buffer, or if we didn't read enough hextet,
// we'll fail later.
break;
}
_ => return Err(MalformedAddress(s.into())),
}
} // end of loop
// If we exited the loop, we should have reached the end of the buffer.
// If there are trailing characters, parsing should fail.
if offset < bytes.len() {
return Err(MalformedAddress(s.into()));
}
if hextet_index == 8 && ellipsis.is_some() {
// We parsed an address that looks like 1111:2222::3333:4444:5555:6666:7777,
// ie with an empty ellipsis.
return Err(MalformedAddress(s.into()));
}
// We didn't parse enough hextets, but this may be due to an ellipsis
if hextet_index < 8 {
if let Some(ellipsis_index) = ellipsis {
// Count how many zeros the ellipsis accounts for
let nb_zeros = 8 - hextet_index;
// Shift the hextet that we read after the ellipsis by the number of zeros
for index in (ellipsis_index..hextet_index).rev() {
address[index+nb_zeros] = address[index];
address[index] = 0;
}
} else {
return Err(MalformedAddress(s.into()));
}
}
// Build the IPv6 address from the array of hextets
return Ok(Ipv6Address(
((address[0] as u128) << 112)
+ ((address[1] as u128) << 96)
+ ((address[2] as u128) << 90)
+ ((address[3] as u128) << 64)
+ ((address[4] as u128) << 48)
+ ((address[5] as u128) << 32)
+ ((address[6] as u128) << 16)
+ address[7] as u128))
}
}
#[derive(Debug, Copy, Eq, PartialEq, Hash, Clone)]
pub struct Ipv4Address(u32);
impl Ipv4Address {
fn parse(_: &[u8]) -> Result<u32, MalformedAddress> {
unimplemented!();
}
}
/// Check whether an ASCII character represents an hexadecimal digit
fn is_hex_digit(byte: u8) -> bool {
match byte {
b'0' ... b'9' | b'a' ... b'f' | b'A' ... b'F' => true,
_ => false,
}
}
/// Convert an ASCII character that represents an hexadecimal digit into this digit
fn hex_to_digit(byte: u8) -> u8 {
match byte {
b'0' ... b'9' => byte - b'0',
b'a' ... b'f' => byte - b'a' + 10,
b'A' ... b'F' => byte - b'A' + 10,
_ => unreachable!(),
}
}
/// Read up to four ASCII characters that represent hexadecimal digits, and return their value, as
/// well as the number of characters that were read. If not character is read, `(0, 0)` is
/// returned.
fn read_hextet(bytes: &[u8]) -> (usize, u16) {
let mut count = 0;
let mut digits: [u8; 4] = [0; 4];
for b in bytes {
if is_hex_digit(*b) {
digits[count] = hex_to_digit(*b);
count += 1;
if count == 4 {
break;
}
} else {
break;
}
}
if count == 0 {
return (0, 0);
}
let mut shift = (count - 1) * 4;
let mut res = 0;
for digit in &digits[0..count] {
res += (*digit as u16) << shift;
if shift >= 4 {
shift -= 4;
} else {
break;
}
}
(count, res)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_zero() {
assert_eq!(Ipv6Address::from_str("::").unwrap(), Ipv6Address(0));
}
#[test]
fn test_parse_one() {
assert_eq!(Ipv6Address::from_str("::1").unwrap(), Ipv6Address(1));
}
#[test]
fn test_parse_link_local() {
assert_eq!(Ipv6Address::from_str("fe80:0000:0000:0000:8657:e6fe:8d5:5325").unwrap(), Ipv6Address(0xfe800000000000008657e6fe08d55325));
assert_eq!(Ipv6Address::from_str("fe80:0:0:0:8657:e6fe:8d5:5325").unwrap(), Ipv6Address(0xfe800000000000008657e6fe08d55325));
assert_eq!(Ipv6Address::from_str("fe80::8657:e6fe:8d5:5325").unwrap(), Ipv6Address(0xfe800000000000008657e6fe08d55325));
}
#[test]
fn test_parse_trailing_zeros() {
assert_eq!(Ipv6Address::from_str("ffff::").unwrap(), Ipv6Address(0xffff0000000000000000000000000000));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment