Skip to content

Instantly share code, notes, and snippets.

@erickt
Created July 29, 2012 18:40
Show Gist options
  • Save erickt/3200980 to your computer and use it in GitHub Desktop.
Save erickt/3200980 to your computer and use it in GitHub Desktop.
mod url;
mod url_authority;
//#set_loc(1, "url.rl");
//
// URL Parser
// Copyright (c) 2010 J.A. Roberts Tunney
// MIT License
//
// Converted to Rust by Erick Tryzelaar
//
// To compile:
//
// ragel --host-lang=rust url.rl -o url.rs
// ragel --host-lang=rust url_authority.rl -o url_authority.rs
// rustc url.rc
// ./url
//
// To show a diagram of your state machine:
//
// ragel -V -p -o url.dot url.rl
// dot -Tpng -o url.png url.dot
// chrome url.png
//
// ragel -V -p -o url_authority.dot url_authority.rl
// dot -Tpng -o url_authority.png url_authority.dot
// chrome url_authority.png
//
// Reference:
//
// - http://tools.ietf.org/html/rfc3986
//
use std;
import result::{result, ok, err};
import url_authority::{url, parse_authority};
fn dummy() -> url {
{
scheme: ~"", user: ~"", pass: ~"", host: ~"", port: 0,
params: ~"", path: ~"", query: ~"", fragment: ~"",
}
}
//#set_loc(43, "url.rl");
//#set_loc(48, "url.rs");
fn init__url_actions_0() -> ~[i8] {
~[
0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1,
5, 1, 6, 1, 7, 1, 8, 1, 9, 1, 10, 2,
0, 9, 2, 1, 2, 2, 1, 3, 2, 1, 10, 3,
7, 1, 2
]
}
fn init__url_key_offsets_0() -> ~[i8] {
~[
0, 0, 4, 14, 24, 30, 36, 42, 48, 54, 60, 66,
72, 73, 83, 91, 99, 108, 116, 124
]
}
fn init__url_trans_keys_0() -> ~[u8] {
~[
65, 90, 97, 122, 43, 58, 45, 46, 48, 57, 65, 90,
97, 122, 37, 47, 60, 127, 0, 32, 34, 35, 62, 63,
48, 57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102,
48, 57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102,
48, 57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102,
48, 57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102,
47, 34, 35, 37, 47, 60, 62, 63, 127, 0, 32, 37,
60, 62, 127, 0, 32, 34, 35, 37, 60, 62, 127, 0,
32, 34, 35, 34, 35, 37, 60, 62, 63, 127, 0, 32,
34, 35, 37, 60, 62, 127, 0, 32, 34, 35, 37, 60,
62, 127, 0, 32, 34, 35, 37, 47, 60, 62, 63, 127,
0, 32, 0
]
}
fn init__url_single_lengths_0() -> ~[i8] {
~[
0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0,
1, 8, 4, 4, 7, 6, 6, 8
]
}
fn init__url_range_lengths_0() -> ~[i8] {
~[
0, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 1, 2, 2, 1, 1, 1, 1
]
}
fn init__url_index_offsets_0() -> ~[i8] {
~[
0, 0, 3, 10, 18, 22, 26, 30, 34, 38, 42, 46,
50, 52, 62, 69, 76, 85, 93, 101
]
}
fn init__url_indicies_0() -> ~[i8] {
~[
0, 2, 1, 3, 4, 3, 3, 5, 3, 1, 7, 8,
1, 1, 1, 1, 1, 6, 9, 9, 9, 1, 10, 10,
10, 1, 11, 11, 11, 1, 12, 12, 12, 1, 13, 13,
13, 1, 14, 14, 14, 1, 15, 15, 15, 1, 16, 16,
16, 1, 17, 1, 1, 18, 19, 20, 1, 1, 21, 1,
1, 12, 23, 1, 1, 1, 1, 1, 22, 25, 1, 1,
1, 1, 1, 24, 1, 27, 28, 1, 1, 29, 1, 1,
26, 1, 31, 32, 1, 1, 1, 1, 30, 1, 33, 34,
1, 1, 1, 1, 16, 1, 35, 7, 36, 1, 1, 37,
1, 1, 6, 0
]
}
fn init__url_trans_targs_0() -> ~[i8] {
~[
2, 0, 2, 2, 3, 2, 13, 6, 12, 5, 15, 7,
13, 9, 16, 11, 18, 19, 14, 6, 16, 17, 15, 4,
15, 4, 16, 14, 8, 17, 18, 14, 10, 14, 10, 14,
16, 17
]
}
fn init__url_trans_actions_0() -> ~[i8] {
~[
29, 0, 26, 5, 13, 7, 1, 1, 0, 9, 11, 0,
0, 9, 11, 0, 0, 0, 15, 0, 35, 15, 26, 3,
5, 0, 5, 17, 0, 17, 1, 23, 1, 19, 0, 0,
26, 0
]
}
fn init__url_eof_actions_0() -> ~[i8] {
~[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 15, 32, 21, 17, 23, 19, 0
]
}
const url_start: int = 1;
const url_first_final: int = 13;
const url_error: int = 0;
const url_en_main: int = 1;
//#set_loc(44, "url.rl");
// i parse absolute urls and don't suck at it. i'll parse just about
// any type of url you can think of and give you a human-friendly data
// structure.
//
// this routine takes no more than a few microseconds, is reentrant,
// performs in a predictable manner (for security/soft-realtime,)
// doesn't modify your `data` buffer, and under no circumstances will
// it panic (i hope!)
fn url_parse(data: ~[u8]) -> result<url, @~str> {
let mut cs: int;
let mut p = 0;
let mut pe = data.len();
let mut eof = data.len();
let mut mark = 0;
let mut url = dummy();
// this buffer is so we can unescape while we roll
let mut buf = vec::to_mut(vec::from_elem(data.len(), 0));
let mut hex = 0;
let mut amt = 0;
//#set_loc(175, "url.rs");
{
cs = url_start;
}
//#set_loc(180, "url.rs");
{
let _url_actions = init__url_actions_0();
let _url_key_offsets = init__url_key_offsets_0();
let _url_trans_keys = init__url_trans_keys_0();
let _url_single_lengths = init__url_single_lengths_0();
let _url_range_lengths = init__url_range_lengths_0();
let _url_index_offsets = init__url_index_offsets_0();
let _url_indicies = init__url_indicies_0();
let _url_trans_targs = init__url_trans_targs_0();
let _url_trans_actions = init__url_trans_actions_0();
let _url_eof_actions = init__url_eof_actions_0();
let mut _klen: int;
let mut _trans = 0;
let mut _acts: int;
let mut _nacts: int;
let mut _keys: int;
let mut _goto_targ = 0;
loop {
alt check _goto_targ {
0 {
if p == pe {
_goto_targ = 4;
again;
}
if cs == 0 {
_goto_targ = 5;
again;
}
_goto_targ = 1;
again;
}
1 {
let mut _break_match = false;
loop {
_keys = _url_key_offsets[cs] as int;
_trans = _url_index_offsets[cs] as int;
_klen = _url_single_lengths[cs] as int;
if _klen > 0 {
let mut _lower: int = _keys;
let mut _mid: int;
let mut _upper: int = _keys + _klen - 1;
loop {
if _upper < _lower { break; }
_mid = _lower + ((_upper-_lower) >> 1);
if data[p] < _url_trans_keys[_mid] {
_upper = _mid - 1;
} else if data[p] > _url_trans_keys[_mid] {
_lower = _mid + 1;
} else {
_trans += (_mid - _keys);
_break_match = true;
break;
}
}
if _break_match { break; }
_keys += _klen;
_trans += _klen;
}
_klen = _url_range_lengths[cs] as int;
if _klen > 0 {
let mut _lower = _keys;
let mut _mid: int;
let mut _upper = _keys + (_klen<<1) - 2;
loop {
if _upper < _lower { break; }
_mid = _lower + (((_upper-_lower) >> 1) & int::compl(1));
if data[p] < _url_trans_keys[_mid] {
_upper = _mid - 2;
} else if data[p] > _url_trans_keys[_mid+1] {
_lower = _mid + 2;
} else {
_trans += ((_mid - _keys)>>1);
_break_match = true;
break;
}
}
if _break_match { break; }
_trans += _klen;
}
break;
}
_trans = _url_indicies[_trans] as int;
cs = _url_trans_targs[_trans] as int;
if _url_trans_actions[_trans] != 0 {
_acts = _url_trans_actions[_trans] as int;
_nacts = _url_actions[_acts] as int;
_acts += 1;
while _nacts > 0 {
_nacts -= 1;
let __acts = _acts;
_acts += 1;
alt check _url_actions[__acts] {
0 {
//#set_loc(68, "url.rl");
{ mark = p; }
}
1 {
//#set_loc(69, "url.rl");
{ amt = 0; }
}
2 {
//#set_loc(70, "url.rl");
{ buf[amt] = data[p]; amt += 1; }
}
3 {
//#set_loc(71, "url.rl");
{ buf[amt] = data[p] + 0x20; amt += 1; }
}
4 {
//#set_loc(73, "url.rl");
{
hex = alt char::to_digit(data[p] as char, 16) {
none { ret err(@~"invalid hex"); }
some(hex) { hex * 16 }
}
}
}
5 {
//#set_loc(80, "url.rl");
{
hex += alt char::to_digit(data[p] as char, 16) {
none { ret err(@~"invalid hex"); }
some(hex) { hex }
};
buf[amt] = hex as u8;
amt += 1;
}
}
6 {
//#set_loc(89, "url.rl");
{
url.scheme = str::from_bytes(buf.slice(0, amt));
}
}
7 {
//#set_loc(93, "url.rl");
{
let v = vec::view(data, mark, p);
let authority = parse_authority(url, v);
if authority.is_err() {
ret err(authority.get_err());
}
url = result::unwrap(authority);
}
}
8 {
//#set_loc(102, "url.rl");
{
url.path = str::from_bytes(buf.slice(0, amt));
}
}
9 {
//#set_loc(106, "url.rl");
{
url.query = str::from_bytes(data.slice(mark, p));
}
}
//#set_loc(344, "url.rs");
}
}
}
_goto_targ = 2;
again;
}
2 {
if cs == 0 {
_goto_targ = 5;
again;
}
p += 1;
if p != pe {
_goto_targ = 1;
again;
}
_goto_targ = 4;
again;
}
4 {
if p == eof {
let mut __acts = _url_eof_actions[cs] as int;
let mut __nacts = _url_actions[__acts] as int;
__acts += 1;
while __nacts > 0 {
__nacts -= 1;
let ___acts = __acts;
__acts += 1;
alt check _url_actions[___acts] {
0 {
//#set_loc(68, "url.rl");
{ mark = p; }
}
1 {
//#set_loc(69, "url.rl");
{ amt = 0; }
}
7 {
//#set_loc(93, "url.rl");
{
let v = vec::view(data, mark, p);
let authority = parse_authority(url, v);
if authority.is_err() {
ret err(authority.get_err());
}
url = result::unwrap(authority);
}
}
8 {
//#set_loc(102, "url.rl");
{
url.path = str::from_bytes(buf.slice(0, amt));
}
}
9 {
//#set_loc(106, "url.rl");
{
url.query = str::from_bytes(data.slice(mark, p));
}
}
10 {
//#set_loc(110, "url.rl");
{
url.fragment = str::from_bytes(buf.slice(0, amt));
}
}
//#set_loc(412, "url.rs");
}
}
}
}
5 { }
}
break;
}
}
//#set_loc(146, "url.rl");
if cs < url_first_final {
if p == pe {
err(@~"unexpected eof")
} else {
err(@#fmt("error in url at pos %u", p))
}
} else {
ok(url)
}
}
//////////////////////////////////////////////////////////////////////
#[cfg(test)]
mod tests {
import std::time;
#[test]
fn test() {
let data = [(
~"http://user:pass@example.com:80;hello/lol.php?fun#omg",
{
scheme: ~"http",
user: ~"user",
pass: ~"pass",
host: ~"example.com",
port: 80,
params: ~"hello",
path: ~"/lol.php",
query: ~"fun",
fragment: ~"omg",
}
), (
~"a:b",
{
scheme: ~"a",
host: ~"b",
with dummy()
}
), (
~"GoPHeR://@example.com@:;/?#",
{
scheme: ~"gopher",
host: ~"@example.com@",
path: ~"/",
with dummy()
}
), (
~"ldap://[2001:db8::7]/c=GB?objectClass/?one",
{
scheme: ~"ldap",
host: ~"2001:db8::7",
path: ~"/c=GB",
query: ~"objectClass/?one",
with dummy()
}
), (
~"http://user@example.com",
{
scheme: ~"http",
user: ~"user",
host: ~"example.com",
with dummy()
}
), (
~"http://品研发和研发管@☃.com:65000;%20",
{
scheme: ~"http",
user: ~"品研发和研发管",
host: ~"☃.com",
port: 65000,
params: ~"%20",
with dummy()
}
), (
~"https://example.com:80",
{
scheme: ~"https",
host: ~"example.com",
port: 80,
with dummy()
}
), (
~"file:///etc/passwd",
{
scheme: ~"file",
path: ~"/etc/passwd",
with dummy()
}
), (
~"file:///c:/WINDOWS/clock.avi",
{
scheme: ~"file",
path: ~"/c:/WINDOWS/clock.avi", /* <-- is this kosher? */
with dummy()
}
), (
~"file://hostname/path/to/the%20file.txt",
{
scheme: ~"file",
host: ~"hostname",
path: ~"/path/to/the file.txt",
with dummy()
}
), (
~"sip:example.com",
{
scheme: ~"sip",
host: ~"example.com",
with dummy()
}
), (
~"sip:example.com:5060",
{
scheme: ~"sip",
host: ~"example.com",
port: 5060,
with dummy()
}
), (
~"mailto:ditto@pokémon.com",
{
scheme: ~"mailto",
user: ~"ditto",
host: ~"pokémon.com",
with dummy()
}
), (
~"sip:[dead:beef::666]:5060",
{
scheme: ~"sip",
host: ~"dead:beef::666",
port: 5060,
with dummy()
}
), (
~"tel:+12126660420",
{
scheme: ~"tel",
host: ~"+12126660420",
with dummy()
}
), (
~"sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg",
{
scheme: ~"sip",
user: ~"bob barker",
pass: ~"priceisright",
host: ~"dead:beef::666",
port: 5060,
params: ~"isup-oli=00",
path: ~"/palfun.html",
query: ~"haha",
fragment: ~"omg",
with dummy()
}
), (
~"http://www.google.com/search?%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=",
{
scheme: ~"http",
host: ~"www.google.com",
path: ~"/search",
query: ~"%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=",
with dummy()
}
)];
for data.each |data| {
alt data {
(s, expected) {
alt url_parse(str::bytes(s)) {
err(e) { fail *e; }
ok(url) { assert expected == url; }
}
}
}
}
}
#[test]
fn benchmark() {
let rounds = 100000;
let urls = [
~"a:a",
~"http://google.com/",
~"sip:jtunney@lobstertech.com",
~"http://user:pass@example.com:80;hello/lol.php?fun#omg",
~"file:///etc/passwd",
];
for urls.each |url| {
let t1 = time::precise_time_ns();
for rounds.times {
url_parse(str::bytes(url));
}
let t2 = time::precise_time_ns();
io::println(#fmt("BENCH parse %s -> %f ns",
url,
((t2 - t1) as float) / (rounds as float)));
}
}
}
//#set_loc(1, "url_authority.rl");
/*
// -*-go-*-
//
// URL Parser
// Copyright (c) 2010 J.A. Roberts Tunney
// MIT License
//
*/
import result::{result, ok, err};
//#set_loc(13, "url_authority.rl");
//#set_loc(18, "url_authority.rs");
fn init__url_authority_actions_0() -> ~[i8] {
~[
0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1,
11, 1, 12, 2, 1, 2, 2, 5, 7, 2, 6, 8,
2, 6, 11, 2, 9, 10, 3, 0, 9, 10, 3, 1,
6, 8, 3, 5, 7, 12, 3, 5, 11, 2, 3, 6,
8, 12, 3, 12, 5, 7, 3, 12, 6, 8, 4, 1,
6, 8, 12
]
}
fn init__url_authority_key_offsets_0() -> ~[i16] {
~[
0, 0, 14, 20, 26, 40, 46, 52, 65, 71, 77, 83,
89, 95, 101, 114, 127, 133, 139, 145, 151, 165, 179, 185,
191, 206, 222, 236, 240, 244, 256, 268, 270, 287, 301
]
}
fn init__url_authority_trans_keys_0() -> ~[u8] {
~[
37, 47, 64, 91, 93, 127, 0, 32, 34, 35, 58, 60,
62, 63, 48, 57, 65, 70, 97, 102, 48, 57, 65, 70,
97, 102, 37, 47, 64, 91, 93, 127, 0, 32, 34, 35,
58, 60, 62, 63, 48, 57, 65, 70, 97, 102, 48, 57,
65, 70, 97, 102, 37, 47, 91, 93, 127, 0, 32, 34,
35, 58, 60, 62, 63, 48, 57, 65, 70, 97, 102, 48,
57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102, 48,
57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102, 48,
57, 65, 70, 97, 102, 37, 47, 91, 93, 127, 0, 32,
34, 35, 59, 60, 62, 63, 37, 47, 91, 93, 127, 0,
32, 34, 35, 59, 60, 62, 63, 48, 57, 65, 70, 97,
102, 48, 57, 65, 70, 97, 102, 48, 57, 65, 70, 97,
102, 48, 57, 65, 70, 97, 102, 37, 47, 64, 91, 93,
127, 0, 32, 34, 35, 59, 60, 62, 63, 37, 47, 64,
91, 93, 127, 0, 32, 34, 35, 59, 60, 62, 63, 48,
57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102, 37,
47, 58, 59, 60, 64, 91, 93, 127, 0, 32, 34, 35,
62, 63, 37, 47, 59, 91, 93, 127, 0, 32, 34, 35,
48, 57, 58, 60, 62, 64, 37, 47, 58, 59, 60, 91,
93, 127, 0, 32, 34, 35, 62, 63, 37, 59, 48, 57,
37, 59, 48, 57, 37, 47, 60, 91, 93, 127, 0, 32,
34, 35, 62, 63, 37, 47, 60, 91, 93, 127, 0, 32,
34, 35, 62, 63, 58, 59, 37, 47, 59, 64, 91, 93,
127, 0, 32, 34, 35, 48, 57, 58, 60, 62, 63, 37,
47, 58, 59, 60, 91, 93, 127, 0, 32, 34, 35, 62,
63, 58, 59, 64, 0
]
}
fn init__url_authority_single_lengths_0() -> ~[i8] {
~[
0, 6, 0, 0, 6, 0, 0, 5, 0, 0, 0, 0,
0, 0, 5, 5, 0, 0, 0, 0, 6, 6, 0, 0,
9, 6, 8, 2, 2, 6, 6, 2, 7, 8, 3
]
}
fn init__url_authority_range_lengths_0() -> ~[i8] {
~[
0, 4, 3, 3, 4, 3, 3, 4, 3, 3, 3, 3,
3, 3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 3,
3, 5, 3, 1, 1, 3, 3, 0, 5, 3, 0
]
}
fn init__url_authority_index_offsets_0() -> ~[i16] {
~[
0, 0, 11, 15, 19, 30, 34, 38, 48, 52, 56, 60,
64, 68, 72, 82, 92, 96, 100, 104, 108, 119, 130, 134,
138, 151, 163, 175, 179, 183, 193, 203, 206, 219, 231
]
}
fn init__url_authority_indicies_0() -> ~[i8] {
~[
2, 1, 3, 4, 1, 1, 1, 1, 1, 1, 0, 5,
5, 5, 1, 6, 6, 6, 1, 8, 1, 9, 1, 1,
1, 1, 1, 1, 1, 7, 10, 10, 10, 1, 11, 11,
11, 1, 12, 1, 13, 1, 1, 1, 1, 1, 1, 3,
14, 14, 14, 1, 15, 15, 15, 1, 16, 16, 16, 1,
17, 17, 17, 1, 18, 18, 18, 1, 19, 19, 19, 1,
21, 1, 1, 1, 1, 1, 1, 1, 1, 20, 23, 1,
1, 24, 1, 1, 1, 1, 1, 22, 25, 25, 25, 1,
26, 26, 26, 1, 27, 27, 27, 1, 28, 28, 28, 1,
30, 1, 20, 1, 1, 1, 1, 1, 1, 1, 29, 32,
1, 22, 1, 33, 1, 1, 1, 1, 1, 31, 34, 34,
34, 1, 35, 35, 35, 1, 37, 1, 38, 39, 1, 40,
1, 1, 1, 1, 1, 1, 36, 42, 1, 44, 1, 1,
1, 1, 1, 43, 1, 1, 41, 46, 1, 47, 39, 1,
1, 1, 1, 1, 1, 1, 45, 48, 44, 49, 1, 50,
52, 51, 1, 54, 1, 1, 1, 1, 1, 1, 1, 1,
53, 55, 1, 1, 1, 1, 1, 1, 1, 1, 19, 56,
57, 1, 58, 1, 52, 9, 1, 1, 1, 1, 1, 59,
1, 1, 7, 12, 1, 47, 39, 1, 13, 1, 1, 1,
1, 1, 3, 60, 57, 61, 1, 0
]
}
fn init__url_authority_trans_targs_0() -> ~[i8] {
~[
24, 0, 2, 26, 20, 3, 24, 4, 5, 7, 6, 4,
8, 14, 9, 26, 11, 28, 13, 30, 15, 16, 15, 16,
31, 17, 15, 19, 32, 21, 22, 21, 22, 34, 23, 21,
24, 2, 25, 29, 33, 4, 18, 32, 29, 26, 8, 27,
10, 28, 10, 28, 29, 30, 12, 12, 27, 29, 18, 32,
25, 7
]
}
fn init__url_authority_trans_actions_0() -> ~[i8] {
~[
15, 0, 3, 15, 0, 7, 9, 5, 0, 24, 7, 9,
3, 0, 7, 9, 7, 9, 0, 0, 15, 3, 5, 0,
18, 7, 9, 7, 9, 15, 3, 5, 0, 18, 7, 9,
5, 0, 18, 18, 42, 15, 3, 15, 34, 5, 0, 18,
3, 15, 0, 5, 21, 1, 1, 0, 0, 0, 0, 5,
0, 11
]
}
fn init__url_authority_eof_actions_0() -> ~[i8] {
~[
0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50, 58, 38, 58, 46, 30, 27, 13, 54, 38, 13
]
}
const url_authority_start: int = 1;
const url_authority_first_final: int = 24;
const url_authority_error: int = 0;
const url_authority_en_main: int = 1;
//#set_loc(14, "url_authority.rl");
/*
// i parse strings like `alice@pokémon.com`.
//
// sounds simple right? but i also parse stuff like:
//
// bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00
//
// which in actual reality is:
//
// - User: "bob barker"
// - Pass: "priceisright"
// - Host: "dead:beef::666"
// - Port: 5060
// - Params: "isup-oli=00"
//
// which was probably extracted from an absolute url that looked like:
//
// sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg
//
// which was probably extracted from its address form:
//
// "Bob Barker" <sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg>;tag=666
//
// who would have thought this could be so hard ._.
*/
type url = {
scheme : ~str, /* http, sip, file, etc. (never blank, always lowercase) */
user : ~str, /* who is you */
pass : ~str, /* for like, logging in */
host : ~str, /* IP 4/6 address or hostname (mandatory) */
port : u16, /* like 80 or 5060 (default 0) */
params : ~str, /* stuff after ';' (NOT UNESCAPED, used in sip) */
path : ~str, /* stuff starting with '/' */
query : ~str, /* stuff after '?' (NOT UNESCAPED) */
fragment : ~str, /* stuff after '#' */
};
fn parse_authority(-url: url, data: &[u8]) -> result<url, @~str> {
let mut url <- url;
let mut cs: int;
let mut p = 0;
let mut pe = data.len();
let mut eof = data.len();
let mut mark = 0;
// temporary holding place for user:pass and/or host:port cuz an
// optional term (user[:pass]) coming before a mandatory term
// (host[:pass]) would require require backtracking and all that
// evil nondeterministic stuff which ragel seems to hate. (for
// this same reason you're also allowed to use square quotes
// around the username.)
let mut b1 = ~"";
let mut b2 = ~"";
// this buffer is so we can unescape while we roll
let mut buf = vec::to_mut(vec::from_elem(data.len(), 0));
let mut hex = 0;
let mut amt = 0;
fn parse_port(s: ~str) -> option<u16> {
if s != ~"" {
do uint::from_str(s).chain |port| {
if port > 65535 { none } else { some(port as u16) }
}
} else {
some(0)
}
}
//#set_loc(229, "url_authority.rs");
{
cs = url_authority_start;
}
//#set_loc(234, "url_authority.rs");
{
let _url_authority_actions = init__url_authority_actions_0();
let _url_authority_key_offsets = init__url_authority_key_offsets_0();
let _url_authority_trans_keys = init__url_authority_trans_keys_0();
let _url_authority_single_lengths = init__url_authority_single_lengths_0();
let _url_authority_range_lengths = init__url_authority_range_lengths_0();
let _url_authority_index_offsets = init__url_authority_index_offsets_0();
let _url_authority_indicies = init__url_authority_indicies_0();
let _url_authority_trans_targs = init__url_authority_trans_targs_0();
let _url_authority_trans_actions = init__url_authority_trans_actions_0();
let _url_authority_eof_actions = init__url_authority_eof_actions_0();
let mut _klen: int;
let mut _trans = 0;
let mut _acts: int;
let mut _nacts: int;
let mut _keys: int;
let mut _goto_targ = 0;
loop {
alt check _goto_targ {
0 {
if p == pe {
_goto_targ = 4;
again;
}
if cs == 0 {
_goto_targ = 5;
again;
}
_goto_targ = 1;
again;
}
1 {
let mut _break_match = false;
loop {
_keys = _url_authority_key_offsets[cs] as int;
_trans = _url_authority_index_offsets[cs] as int;
_klen = _url_authority_single_lengths[cs] as int;
if _klen > 0 {
let mut _lower: int = _keys;
let mut _mid: int;
let mut _upper: int = _keys + _klen - 1;
loop {
if _upper < _lower { break; }
_mid = _lower + ((_upper-_lower) >> 1);
if data[p] < _url_authority_trans_keys[_mid] {
_upper = _mid - 1;
} else if data[p] > _url_authority_trans_keys[_mid] {
_lower = _mid + 1;
} else {
_trans += (_mid - _keys);
_break_match = true;
break;
}
}
if _break_match { break; }
_keys += _klen;
_trans += _klen;
}
_klen = _url_authority_range_lengths[cs] as int;
if _klen > 0 {
let mut _lower = _keys;
let mut _mid: int;
let mut _upper = _keys + (_klen<<1) - 2;
loop {
if _upper < _lower { break; }
_mid = _lower + (((_upper-_lower) >> 1) & int::compl(1));
if data[p] < _url_authority_trans_keys[_mid] {
_upper = _mid - 2;
} else if data[p] > _url_authority_trans_keys[_mid+1] {
_lower = _mid + 2;
} else {
_trans += ((_mid - _keys)>>1);
_break_match = true;
break;
}
}
if _break_match { break; }
_trans += _klen;
}
break;
}
_trans = _url_authority_indicies[_trans] as int;
cs = _url_authority_trans_targs[_trans] as int;
if _url_authority_trans_actions[_trans] != 0 {
_acts = _url_authority_trans_actions[_trans] as int;
_nacts = _url_authority_actions[_acts] as int;
_acts += 1;
while _nacts > 0 {
_nacts -= 1;
let __acts = _acts;
_acts += 1;
alt check _url_authority_actions[__acts] {
0 {
//#set_loc(87, "url_authority.rl");
{ mark = p; }
}
1 {
//#set_loc(88, "url_authority.rl");
{ amt = 0; }
}
2 {
//#set_loc(89, "url_authority.rl");
{
buf[amt] = data[p]; amt += 1; }
}
3 {
//#set_loc(92, "url_authority.rl");
{
hex = alt char::to_digit(data[p] as char, 16) {
none { ret err(@~"invalid hex"); }
some(hex) { hex * 16 }
}
}
}
4 {
//#set_loc(99, "url_authority.rl");
{
hex += alt char::to_digit(data[p] as char, 16) {
none { ret err(@~"invalid hex"); }
some(hex) { hex }
};
buf[amt] = hex as u8;
amt += 1;
}
}
5 {
//#set_loc(108, "url_authority.rl");
{
b1 = str::from_bytes(buf.slice(0, amt));
amt = 0;
}
}
6 {
//#set_loc(113, "url_authority.rl");
{
b2 = str::from_bytes(buf.slice(0, amt));
amt = 0;
}
}
7 {
//#set_loc(118, "url_authority.rl");
{
url.host = copy b1;
amt = 0;
}
}
8 {
//#set_loc(123, "url_authority.rl");
{
alt parse_port(b2) {
none {
ret err(@#fmt("bad url authority: %s",
str::from_bytes(data.slice(0, data.len()))))
}
some(port) { url.port = port; }
}
}
}
11 {
//#set_loc(144, "url_authority.rl");
{
url.user = copy b1;
url.pass = copy b2;
b2 = ~"";
}
}
//#set_loc(407, "url_authority.rs");
}
}
}
_goto_targ = 2;
again;
}
2 {
if cs == 0 {
_goto_targ = 5;
again;
}
p += 1;
if p != pe {
_goto_targ = 1;
again;
}
_goto_targ = 4;
again;
}
4 {
if p == eof {
let mut __acts = _url_authority_eof_actions[cs] as int;
let mut __nacts = _url_authority_actions[__acts] as int;
__acts += 1;
while __nacts > 0 {
__nacts -= 1;
let ___acts = __acts;
__acts += 1;
alt check _url_authority_actions[___acts] {
0 {
//#set_loc(87, "url_authority.rl");
{ mark = p; }
}
1 {
//#set_loc(88, "url_authority.rl");
{ amt = 0; }
}
5 {
//#set_loc(108, "url_authority.rl");
{
b1 = str::from_bytes(buf.slice(0, amt));
amt = 0;
}
}
6 {
//#set_loc(113, "url_authority.rl");
{
b2 = str::from_bytes(buf.slice(0, amt));
amt = 0;
}
}
7 {
//#set_loc(118, "url_authority.rl");
{
url.host = copy b1;
amt = 0;
}
}
8 {
//#set_loc(123, "url_authority.rl");
{
alt parse_port(b2) {
none {
ret err(@#fmt("bad url authority: %s",
str::from_bytes(data.slice(0, data.len()))))
}
some(port) { url.port = port; }
}
}
}
9 {
//#set_loc(133, "url_authority.rl");
{
let params = str::from_bytes(data.slice(mark, p));
url.params = params;
}
}
10 {
//#set_loc(138, "url_authority.rl");
{
let params = str::from_bytes(data.slice(mark, p));
url.params = params;
ret ok(url)
}
}
12 {
//#set_loc(150, "url_authority.rl");
{
url.host = copy b1;
if url.host == ~"" {
url.host = str::from_bytes(buf.slice(0, amt));
} else {
if amt > 0 {
b2 = str::from_bytes(buf.slice(0, amt));
}
alt parse_port(b2) {
none {
ret err(@#fmt("bad url authority: %s",
str::from_bytes(data.slice(0, data.len()))))
}
some(port) { url.port = port; }
}
}
ret ok(url)
}
}
//#set_loc(517, "url_authority.rs");
}
}
}
}
5 { }
}
break;
}
}
//#set_loc(211, "url_authority.rl");
ok(url)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment