jart/sip.rl

## sip.rl
%%{# -*-ragel-*-
#
## SIP Message Parser Definition
#
# The parser should be a linearly complex FSM. It should be able to parse a
# real-world SIP INVITE in less than 30μs. This parser only trades speed in
# favor of friendly data structures, which are big structs linked together.
#
## Implementation Notes
#
# SIP uses a plaintext encoding that is hard to parse correctly.
#
#   o Whitespace can be used liberally in a variety of different ways.
#
#     - Via host:port can have whitespace, e.g. "host \t:  port"
#
#   o UTF-8 is supported in some places but not others.
#
#   o Headers can span multiple lines.
#
#   o Header values can contain comments, e.g. Message: lol (i'm (hidden))
#
#   o Header names are case-insensitive and have shorthand notation.
#
#   o There's ~50 standard headers, many of which have custom parsing rules.
#
#   o URIs can have ;params;like=this
#
#     - Params can belong either to a URI or Addr object, e.g. <sip:uri;param>
#       cf. <sip:uri>;param
#
#     - Addresses may omit angle brackets, in which case params belong to the
#       Addr object.
#
#     - URI params ;are=escaped%20like%22this but params belonging to Addr
#       ;are="escaped like\"this"
#
#     - Backslash escaping is not like C, e.g. \t\n -> tn
#
#     - Address display name can have whitespace without quotes, which is
#       collapsed. Quoted form is not collapsed.
#
#   o Via and address headers can be repeated in two ways: repeating the
#     header, using commas within a single header, or both.
#
# See: http://www.colm.net/files/ragel/ragel-guide-6.9.pdf
# See: https://tools.ietf.org/html/rfc2234

machine sip;

action hold {
	fhold;
}

action break {
	fbreak;
}

action mark {
	mark = p
}

action backtrack {
	fexec mark;
}

action start {
	amt = 0
}

action append {
	buf[amt] = fc
	amt++
}

action space {
	buf[amt] = ' '
	amt++
}

action hexHi {
	hex = unhex(fc) * 16
}

action hexLo {
	hex += unhex(fc)
	buf[amt] = hex
	amt++
}

action Method {
	msg.Method = string(data[mark:p])
}

action VersionMajor {
	msg.VersionMajor = msg.VersionMajor * 10 + (fc - 0x30)
}

action VersionMinor {
	msg.VersionMinor = msg.VersionMinor * 10 + (fc - 0x30)
}

action RequestURI {
	msg.Request, err = ParseURI(data[mark:p])
	if err != nil { return nil, err }
}

action StatusCode {
	msg.Status = msg.Status * 10 + (int(fc) - 0x30)
}

action ReasonPhrase {
	msg.Phrase = string(buf[0:amt])
}

action ViaNew {
	via = new(Via)
}

action Via {
	*viap = via
	viap = &via.Next
	via = nil
}

action ViaProtocol {
	via.Protocol = string(data[mark:p])
}

action ViaVersion {
	via.Version = string(data[mark:p])
}

action ViaTransport {
	via.Transport = string(data[mark:p])
}

action ViaHost {
	via.Host = string(data[mark:p])
}

action ViaPort {
	via.Port = via.Port * 10 + (uint16(fc) - 0x30)
}

action ViaParam {
	via.Param = &Param{name, string(buf[0:amt]), via.Param}
}

action gxh {
	fhold;
	fgoto xheader;
}

action name {
	name = string(data[mark:p])
}

action value {{
	b := data[mark:p - 1]
	if value != nil {
		*value = string(b)
	} else {
		msg.XHeader = &XHeader{name, b, msg.XHeader}
	}
}}

action AddrNew {
	addr = new(Addr)
}

action AddrQuotedDisplay {
	addr.Display = string(buf[0:amt])
}

action AddrUnquotedDisplay {{
	end := p
	for end > mark && whitespacec(data[end - 1]) {
		end--
	}
	addr.Display = string(data[mark:end])
}}

action AddrUri {
	addr.Uri, err = ParseURI(data[mark:p])
	if err != nil { return nil, err }
}

action AddrParam {
	addr.Param = &Param{name, string(buf[0:amt]), addr.Param}
}

action Addr {
	*addrp = addr
	addrp = &addr.Next
	addr = nil
}

action CallID {
	msg.CallID = string(data[mark:p])
}

action ContentLength {
	clen = clen * 10 + (int(fc) - 0x30)
}

action ContentType {
	ctype = string(data[mark:p])
}

action CSeq {
	msg.CSeq = msg.CSeq * 10 + (int(fc) - 0x30)
}

action CSeqMethod {
	msg.CSeqMethod = string(data[mark:p])
}

action Expires {
	msg.Expires = msg.Expires * 10 + (int(fc) - 0x30)
}

action MaxForwards {
	msg.MaxForwards = msg.MaxForwards * 10 + (int(fc) - 0x30)
}

action MinExpires {
	msg.MinExpires = msg.MinExpires * 10 + (int(fc) - 0x30)
}

action goto_addr { fgoto addr; }
action goto_addr_angled { fgoto addr_angled; }
action goto_addr_param { fgoto addr_param; }
action goto_addr_uri { fgoto addr_uri; }
action goto_ctype { fgoto ctype; }
action goto_header { fgoto header; }
action goto_value { fgoto value; }
action goto_via { fgoto via; }
action goto_via_param { fgoto via_param; }

action lookAheadWSP { lookAheadWSP(data, p, pe) }

SP              = " ";
HTAB            = "\t";
CR              = "\r";
LF              = "\n";
DQUOTE          = "\"";
CRLF            = ( CR when !lookAheadWSP ) LF;
WSP             = SP | HTAB;
LWS             = ( WSP* ( CR when lookAheadWSP ) LF )? WSP+;
SWS             = LWS?;

LWSCRLF_append  = ( CR when lookAheadWSP ) @append LF @append;
LWS_append      = ( WSP* @append LWSCRLF_append )? WSP+ @append;

UTF8_CONT       = 0x80..0xBF @append;
UTF8_NONASCII   = 0xC0..0xDF @append UTF8_CONT {1}
                | 0xE0..0xEF @append UTF8_CONT {2}
                | 0xF0..0xF7 @append UTF8_CONT {3}
                | 0xF8..0xFb @append UTF8_CONT {4}
                | 0xFC..0xFD @append UTF8_CONT {5};
UTF8            = 0x21..0x7F @append | UTF8_NONASCII;

mUTF8_CONT      = 0x80..0xBF;
mUTF8_NONASCII  = 0xC0..0xDF mUTF8_CONT {1}
                | 0xE0..0xEF mUTF8_CONT {2}
                | 0xF0..0xF7 mUTF8_CONT {3}
                | 0xF8..0xFb mUTF8_CONT {4}
                | 0xFC..0xFD mUTF8_CONT {5};
mUTF8           = 0x21..0x7F | mUTF8_NONASCII;

# https://tools.ietf.org/html/rfc3261#section-25.1
reserved        = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," ;
mark            = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" ;
unreserved      = alnum | mark ;
tokenc          = alnum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`"
                | "'" | "~" ;
separators      = "("  | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\"
                | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP
                | HTAB ;
wordc           = alnum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`"
                | "'" | "~" | "(" | ")" | "<" | ">" | ":" | "\\" | "\""
                | "/" | "[" | "]" | "?" | "{" | "}" ;
schmchars       = alnum | "+" | "-" | "." ;
word            = wordc+;
STAR            = SWS "*" SWS;
SLASH           = SWS "/" SWS;
EQUAL           = SWS "=" SWS;
LPAREN          = SWS "(" SWS;
RPAREN          = SWS ")" SWS;
RAQUOT          = ">" SWS;
LAQUOT          = SWS "<";
COMMA           = SWS "," SWS;
SEMI            = SWS ";" SWS;
COLON           = SWS ":" SWS;
HCOLON          = WSP* ":" SWS;
LDQUOT          = SWS "\"";
RDQUOT          = "\"" SWS;
escaped         = "%" ( xdigit @hexHi ) ( xdigit @hexLo ) ;
ipv4c           = digit | "." ;
ipv6c           = xdigit | "." | ":" ;
hostc           = alnum | "-" | "." ;
token           = tokenc+;
tokenhost       = ( tokenc | "[" | "]" | ":" )+;
reasonc         = UTF8_NONASCII | ( reserved | unreserved | SP | HTAB ) @append;
reasonmc        = escaped | reasonc;
cid             = word ( "@" word )?;
hval            = ( mUTF8 | LWS )* >mark;

schemec         = alnum | "+" | "-" | ".";
scheme          = alpha schemec*;
uric            = reserved | unreserved | "%" | "[" | "]";
uri             = scheme ":" uric+;

# Quoted strings can have just about anything, including backslash escapes,
# which aren't quite as fancy as the ones you'd see in programming.
qdtextc         = 0x21 | 0x23..0x5B | 0x5D..0x7E;
qdtext          = UTF8_NONASCII | LWS_append | qdtextc @append;
quoted_pair     = "\\" ( 0x00..0x09 | 0x0B..0x0C | 0x0E..0x7F ) @append;
quoted_content  = ( qdtext | quoted_pair )* >start;
quoted_string   = DQUOTE quoted_content DQUOTE;
unquoted_string = ( token LWS )+;

# Content Type Parsing
#
# This is easy-peasy. It almost always contains the value "application/sdp".
# We're going to ignore the parameters, because this information is actually
# stored in Msg by way of type interface and we don't support any types that
# take parameters.
ctype_param     = SEMI token EQUAL ( token | quoted_string );
ctype_mime      = ( token "/" token ) >mark %ContentType;
ctype          := ctype_mime ctype_param* CRLF @goto_header;

# Parameter Parsing
#
# Parameters can be used by vias and addresses, but not URIs. They can look
# like=this or like="this". The =value part is optional.
param_name      = token >mark %name;
param_content   = tokenhost @append;
param_value     = param_content | quoted_string;
param           = param_name >start (EQUAL param_value)?;

# Via Parsing
#
# Vias are used to trace SIP hops. It's similar to an address, but with simpler
# syntax. Here's some examples:
#
#   - Via: SIP/2.0/UDP 1.2.3.4:5060;branch=z9hG4bK-d1d81e94a099
#   - Via: SIP/2.0/TLS [feed:a::bee] ;branch="z9hG4bK-doge" ;rport=666
#
# Parsing these is kind of difficult because infinite whitespace is allowed
# between colons, semicolons, commas, and don't forget that lines can
# continue. So we're going to break things down into four separate machines
# that jump between each other.
ViaProtocol     = token >mark %ViaProtocol;
ViaVersion      = token >mark %ViaVersion;
ViaTransport    = token >mark %ViaTransport;
ViaSent         = ViaProtocol SLASH ViaVersion SLASH ViaTransport;
ViaHostIPv4     = ipv4c+ >mark %ViaHost;
ViaHostIPv6     = "[" ipv6c+ >mark %ViaHost "]";
ViaHostName     = hostc+ >mark %ViaHost;
ViaHost         = ViaHostIPv4 | ViaHostIPv6 | ViaHostName;
ViaPort         = digit+ @ViaPort;
via_param_end   = CRLF @ViaParam @Via @goto_header
                | SEMI <: any @ViaParam @hold @start @goto_via_param
                | COMMA <: any @ViaParam @Via @ViaNew @hold @goto_via;
via_param      := param via_param_end;
via_end         = CRLF @Via @goto_header
                | SEMI <: any @hold @start @goto_via_param
                | COMMA <: any @Via @ViaNew @hold @goto_via;
via            := ViaSent LWS ViaHost (COLON ViaPort)? via_end;

# Address Parsing
#
# These can come in the following forms, which can be comma-delimited:
#
#   - Unangled: sip:lol.example;param
#   - Angled: <sip:lol.example;param>;param
#   - Unquoted: oh my goth <sip:boo@lol[feed:a::bee]:5060>
#   - Quoted: "oh my \"goth\"" <sip:lol.example>
#
# In order to tell the unangled and unquoted angled forms apart, we need to
# look for ':' or '<' character and then backtrack to the appropriate machine.
#
# Because Addr and URI can both have parameters, one might wonder what happens
# to them in the unmangled form. Are they owned by URI? Or are they owned by
# Addr? The answer is the latter.
#
# The URIs themselves are parsed by a separate routine. All we do here is
# extract the bytes and pass them along. It would be nice if we could put the
# URI parsing in this file, where the URI parsing is invoked by fcall. But
# that's not possible, because it appears Ragel Go is broken in that regard.
addr_spec          = LAQUOT uri >mark %AddrUri RAQUOT;
addr_display       = quoted_string >start %AddrQuotedDisplay
                   | unquoted_string >mark %AddrUnquotedDisplay;
addr_param_end     = CRLF @AddrParam @Addr @goto_header
                   | SEMI <: any @AddrParam @hold @goto_addr_param
                   | COMMA <: any @AddrParam @Addr @hold @goto_addr;
addr_param        := param addr_param_end;
addr_angled_end    = CRLF @Addr @goto_header
                   | SEMI <: any @hold @goto_addr_param
                   | COMMA <: any @Addr @hold @goto_addr;
addr_angled       := addr_display? addr_spec addr_angled_end;
addr_uri_end       = CRLF %Addr @goto_header
                   | SEMI <: any @hold @goto_addr_param
                   | COMMA <: any @Addr @hold @goto_addr;
addr_uri          := ( uri - ";" ) %AddrUri addr_uri_end;
addr              := [<\"] @AddrNew @hold @goto_addr_angled
                   | unquoted_string >mark "<" @AddrNew @backtrack @goto_addr_angled
                   | scheme >mark ":" @AddrNew @backtrack @goto_addr_uri;

# Address Header Name Definitions
#
# These headers set the addr pointer to tell the 'value' machine where to
# store the value after using ParseAddrBytes().
aname    = ("Contact"i | "m"i) %{addrp=lastAddr(&msg.Contact)}
         | ("From"i | "f"i) %{addrp=lastAddr(&msg.From)}
         | "P-Asserted-Identity"i %{addrp=lastAddr(&msg.PAssertedIdentity)}
         | "Record-Route"i %{addrp=lastAddr(&msg.RecordRoute)}
         | "Remote-Party-ID"i %{addrp=lastAddr(&msg.RemotePartyID)}
         | "Route"i %{addrp=lastAddr(&msg.Route)}
         | ("To"i | "t"i) %{addrp=lastAddr(&msg.To)}
         ;

# String Header Name Definitions
#
# These headers set the value pointer to tell the 'value' machine where to
# store the resulting token string.
sname    = "Accept"i %{value=&msg.Accept}
         | ("Accept-Contact"i | "a"i) %{value=&msg.AcceptContact}
         | "Accept-Encoding"i %{value=&msg.AcceptEncoding}
         | "Accept-Language"i %{value=&msg.AcceptLanguage}
         | ("Allow"i | "u"i) %{value=&msg.Allow}
         | ("Allow-Events"i | "u"i) %{value=&msg.AllowEvents}
         | "Alert-Info"i %{value=&msg.AlertInfo}
         | "Authentication-Info"i %{value=&msg.AuthenticationInfo}
         | "Authorization"i %{value=&msg.Authorization}
         | "Content-Disposition"i %{value=&msg.ContentDisposition}
         | "Content-Language"i %{value=&msg.ContentLanguage}
         | ("Content-Encoding"i | "e"i) %{value=&msg.ContentEncoding}
         | "Call-Info"i %{value=&msg.CallInfo}
         | "Date"i %{value=&msg.Date}
         | "Error-Info"i %{value=&msg.ErrorInfo}
         | ("Event"i | "o"i) %{value=&msg.Event}
         | "In-Reply-To"i %{value=&msg.InReplyTo}
         | "Reply-To"i %{value=&msg.ReplyTo}
         | "MIME-Version"i %{value=&msg.MIMEVersion}
         | "Organization"i %{value=&msg.Organization}
         | "Priority"i %{value=&msg.Priority}
         | "Proxy-Authenticate"i %{value=&msg.ProxyAuthenticate}
         | "Proxy-Authorization"i %{value=&msg.ProxyAuthorization}
         | "Proxy-Require"i %{value=&msg.ProxyRequire}
         | ("Refer-To"i | "r"i) %{value=&msg.ReferTo}
         | ("Referred-By"i | "b"i) %{value=&msg.ReferredBy}
         | "Require"i %{value=&msg.Require}
         | "Retry-After"i %{value=&msg.RetryAfter}
         | "Server"i %{value=&msg.Server}
         | ("Subject"i | "s"i) %{value=&msg.Subject}
         | ("Supported"i | "k"i) %{value=&msg.Supported}
         | "Timestamp"i %{value=&msg.Timestamp}
         | "Unsupported"i %{value=&msg.Unsupported}
         | "User-Agent"i %{value=&msg.UserAgent}
         | "Warning"i %{value=&msg.Warning}
         | "WWW-Authenticate"i %{value=&msg.WWWAuthenticate}
         ;

# Custom Header Definitions
#
# These headers do not jump to the 'value' machine, but instead specify
# their own special type of parsing.
cheader  = ("Call-ID"i | "i"i) $!gxh HCOLON cid >mark %CallID
         | ("Content-Length"i | "l"i) $!gxh HCOLON digit+ >{clen=0} @ContentLength
         | "CSeq"i $!gxh HCOLON (digit+ @CSeq) LWS token >mark %CSeqMethod
         | ("Expires"i | "l"i) $!gxh HCOLON digit+ >{msg.Expires=0} @Expires
         | ("Max-Forwards"i | "l"i) $!gxh HCOLON digit+ >{msg.MaxForwards=0} @MaxForwards
         | ("Min-Expires"i | "l"i) $!gxh HCOLON digit+ >{msg.MinExpires=0} @MinExpires
         ;

# Header Parsing
#
# The header machine parses a single header and then jumps to itself to
# loop. When the final CRLF is observed, we then break out of the Ragel
# parser and let the Go code handle payload extraction.
#
# Parsing standard header names is a prefix trie search in generated code.
# Lookahead to set the mark on the header name. In order to support
# extended headers, we'll use $!gxh to jump to the xheader machine when an
# unrecognized character is detected in the header name.
#
# An independent machine has been created for generic header values, so
# that it doesn't need to be duplicated for each leaf in the prefix
# trie. When the value machine has finished reading a value, it'll be
# parsed and stored based on whether the value/addr pointers are set.
#
# Header values can span multiple lines. Lookahead is used in the LWS
# definition to check for whitespace at the start of the next line upon
# encountering a line feed character, in order to determine if a line
# continuation is present.
#
# In order to concatenate across machines, we use lookahead in conjunction
# with the left-guarded concatenation operator. This pattern works is
# defined as follows: `foo <: any @hold @goto_bar`.
#
# Header names are case insensitive. Each recognized header is assigned to
# a specific field in the Msg data structure. Extended headers are stored
# to a linked list data structure with the casing preserved. This is so
# messages can be reproduced with roughly the same appearance. It is the
# responsibility of the person using Msg.Headers to do case-insensitive
# string comparisons.
value   := hval <: CRLF @value @goto_header;
xheader := token %name HCOLON <: any @{value=nil} @hold @goto_value;
sheader  = cheader <: CRLF @goto_header
         | aname $!gxh HCOLON <: any @{value=nil} @hold @goto_addr
         | sname $!gxh HCOLON <: any @hold @goto_value
         | ("Via"i | "v"i) $!gxh HCOLON <: any @ViaNew @hold @goto_via
         | ("Content-Type"i | "c"i) $!gxh HCOLON <: any @hold @goto_ctype;
header  := CRLF @break
         | tokenc @mark @hold sheader;

# Start Line Parsing
#
# The Request and Response definitions are very straightforward, and the
# main machine is the union of the two. Once the line feed character has
# been observed, we then jump to the header machine.
# SIP Message Parsing
Method          = token >mark %Method;
SIPVersionNo    = digit+ @VersionMajor "." digit+ @VersionMinor;
RequestURI      = ^SP+ >mark %RequestURI;
StatusCode      = ( digit @StatusCode ) {3};
ReasonPhrase    = reasonmc+ >start %ReasonPhrase;
SIPVersion      = "SIP/" SIPVersionNo;
Request         = Method SP RequestURI SP SIPVersion CRLF @goto_header;
Response        = SIPVersion SP StatusCode SP ReasonPhrase CRLF @goto_header;
Message         = Request | Response;

}%%
	%%{# --ragel--
	#
	## SIP Message Parser Definition
	#
	# The parser should be a linearly complex FSM. It should be able to parse a
	# real-world SIP INVITE in less than 30μs. This parser only trades speed in
	# favor of friendly data structures, which are big structs linked together.
	#
	## Implementation Notes
	#
	# SIP uses a plaintext encoding that is hard to parse correctly.
	#
	# o Whitespace can be used liberally in a variety of different ways.
	#
	# - Via host:port can have whitespace, e.g. "host \t: port"
	#
	# o UTF-8 is supported in some places but not others.
	#
	# o Headers can span multiple lines.
	#
	# o Header values can contain comments, e.g. Message: lol (i'm (hidden))
	#
	# o Header names are case-insensitive and have shorthand notation.
	#
	# o There's ~50 standard headers, many of which have custom parsing rules.
	#
	# o URIs can have ;params;like=this
	#
	# - Params can belong either to a URI or Addr object, e.g. <sip:uri;param>
	# cf. <sip:uri>;param
	#
	# - Addresses may omit angle brackets, in which case params belong to the
	# Addr object.
	#
	# - URI params ;are=escaped%20like%22this but params belonging to Addr
	# ;are="escaped like\"this"
	#
	# - Backslash escaping is not like C, e.g. \t\n -> tn
	#
	# - Address display name can have whitespace without quotes, which is
	# collapsed. Quoted form is not collapsed.
	#
	# o Via and address headers can be repeated in two ways: repeating the
	# header, using commas within a single header, or both.
	#
	# See: http://www.colm.net/files/ragel/ragel-guide-6.9.pdf
	# See: https://tools.ietf.org/html/rfc2234

	machine sip;

	action hold {
	fhold;
	}

	action break {
	fbreak;
	}

	action mark {
	mark = p
	}

	action backtrack {
	fexec mark;
	}

	action start {
	amt = 0
	}

	action append {
	buf[amt] = fc
	amt++
	}

	action space {
	buf[amt] = ' '
	amt++
	}

	action hexHi {
	hex = unhex(fc) * 16
	}

	action hexLo {
	hex += unhex(fc)
	buf[amt] = hex
	amt++
	}

	action Method {
	msg.Method = string(data[mark:p])
	}

	action VersionMajor {
	msg.VersionMajor = msg.VersionMajor * 10 + (fc - 0x30)
	}

	action VersionMinor {
	msg.VersionMinor = msg.VersionMinor * 10 + (fc - 0x30)
	}

	action RequestURI {
	msg.Request, err = ParseURI(data[mark:p])
	if err != nil { return nil, err }
	}

	action StatusCode {
	msg.Status = msg.Status * 10 + (int(fc) - 0x30)
	}

	action ReasonPhrase {
	msg.Phrase = string(buf[0:amt])
	}

	action ViaNew {
	via = new(Via)
	}

	action Via {
	*viap = via
	viap = &via.Next
	via = nil
	}

	action ViaProtocol {
	via.Protocol = string(data[mark:p])
	}

	action ViaVersion {
	via.Version = string(data[mark:p])
	}

	action ViaTransport {
	via.Transport = string(data[mark:p])
	}

	action ViaHost {
	via.Host = string(data[mark:p])
	}

	action ViaPort {
	via.Port = via.Port * 10 + (uint16(fc) - 0x30)
	}

	action ViaParam {
	via.Param = &Param{name, string(buf[0:amt]), via.Param}
	}

	action gxh {
	fhold;
	fgoto xheader;
	}

	action name {
	name = string(data[mark:p])
	}

	action value {{
	b := data[mark:p - 1]
	if value != nil {
	*value = string(b)
	} else {
	msg.XHeader = &XHeader{name, b, msg.XHeader}
	}
	}}

	action AddrNew {
	addr = new(Addr)
	}

	action AddrQuotedDisplay {
	addr.Display = string(buf[0:amt])
	}

	action AddrUnquotedDisplay {{
	end := p
	for end > mark && whitespacec(data[end - 1]) {
	end--
	}
	addr.Display = string(data[mark:end])
	}}

	action AddrUri {
	addr.Uri, err = ParseURI(data[mark:p])
	if err != nil { return nil, err }
	}

	action AddrParam {
	addr.Param = &Param{name, string(buf[0:amt]), addr.Param}
	}

	action Addr {
	*addrp = addr
	addrp = &addr.Next
	addr = nil
	}

	action CallID {
	msg.CallID = string(data[mark:p])
	}

	action ContentLength {
	clen = clen * 10 + (int(fc) - 0x30)
	}

	action ContentType {
	ctype = string(data[mark:p])
	}

	action CSeq {
	msg.CSeq = msg.CSeq * 10 + (int(fc) - 0x30)
	}

	action CSeqMethod {
	msg.CSeqMethod = string(data[mark:p])
	}

	action Expires {
	msg.Expires = msg.Expires * 10 + (int(fc) - 0x30)
	}

	action MaxForwards {
	msg.MaxForwards = msg.MaxForwards * 10 + (int(fc) - 0x30)
	}

	action MinExpires {
	msg.MinExpires = msg.MinExpires * 10 + (int(fc) - 0x30)
	}

	action goto_addr { fgoto addr; }
	action goto_addr_angled { fgoto addr_angled; }
	action goto_addr_param { fgoto addr_param; }
	action goto_addr_uri { fgoto addr_uri; }
	action goto_ctype { fgoto ctype; }
	action goto_header { fgoto header; }
	action goto_value { fgoto value; }
	action goto_via { fgoto via; }
	action goto_via_param { fgoto via_param; }

	action lookAheadWSP { lookAheadWSP(data, p, pe) }

	SP = " ";
	HTAB = "\t";
	CR = "\r";
	LF = "\n";
	DQUOTE = "\"";
	CRLF = ( CR when !lookAheadWSP ) LF;
	WSP = SP \| HTAB;
	LWS = ( WSP* ( CR when lookAheadWSP ) LF )? WSP+;
	SWS = LWS?;

	LWSCRLF_append = ( CR when lookAheadWSP ) @append LF @append;
	LWS_append = ( WSP* @append LWSCRLF_append )? WSP+ @append;

	UTF8_CONT = 0x80..0xBF @append;
	UTF8_NONASCII = 0xC0..0xDF @append UTF8_CONT {1}
	\| 0xE0..0xEF @append UTF8_CONT {2}
	\| 0xF0..0xF7 @append UTF8_CONT {3}
	\| 0xF8..0xFb @append UTF8_CONT {4}
	\| 0xFC..0xFD @append UTF8_CONT {5};
	UTF8 = 0x21..0x7F @append \| UTF8_NONASCII;

	mUTF8_CONT = 0x80..0xBF;
	mUTF8_NONASCII = 0xC0..0xDF mUTF8_CONT {1}
	\| 0xE0..0xEF mUTF8_CONT {2}
	\| 0xF0..0xF7 mUTF8_CONT {3}
	\| 0xF8..0xFb mUTF8_CONT {4}
	\| 0xFC..0xFD mUTF8_CONT {5};
	mUTF8 = 0x21..0x7F \| mUTF8_NONASCII;

	# https://tools.ietf.org/html/rfc3261#section-25.1
	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," ;
	mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")" ;
	unreserved = alnum \| mark ;
	tokenc = alnum \| "-" \| "." \| "!" \| "%" \| "*" \| "_" \| "+" \| "`"
	\| "'" \| "~" ;
	separators = "(" \| ")" \| "<" \| ">" \| "@" \| "," \| ";" \| ":" \| "\\"
	\| "\"" \| "/" \| "[" \| "]" \| "?" \| "=" \| "{" \| "}" \| SP
	\| HTAB ;
	wordc = alnum \| "-" \| "." \| "!" \| "%" \| "*" \| "_" \| "+" \| "`"
	\| "'" \| "~" \| "(" \| ")" \| "<" \| ">" \| ":" \| "\\" \| "\""
	\| "/" \| "[" \| "]" \| "?" \| "{" \| "}" ;
	schmchars = alnum \| "+" \| "-" \| "." ;
	word = wordc+;
	STAR = SWS "*" SWS;
	SLASH = SWS "/" SWS;
	EQUAL = SWS "=" SWS;
	LPAREN = SWS "(" SWS;
	RPAREN = SWS ")" SWS;
	RAQUOT = ">" SWS;
	LAQUOT = SWS "<";
	COMMA = SWS "," SWS;
	SEMI = SWS ";" SWS;
	COLON = SWS ":" SWS;
	HCOLON = WSP* ":" SWS;
	LDQUOT = SWS "\"";
	RDQUOT = "\"" SWS;
	escaped = "%" ( xdigit @hexHi ) ( xdigit @hexLo ) ;
	ipv4c = digit \| "." ;
	ipv6c = xdigit \| "." \| ":" ;
	hostc = alnum \| "-" \| "." ;
	token = tokenc+;
	tokenhost = ( tokenc \| "[" \| "]" \| ":" )+;
	reasonc = UTF8_NONASCII \| ( reserved \| unreserved \| SP \| HTAB ) @append;
	reasonmc = escaped \| reasonc;
	cid = word ( "@" word )?;
	hval = ( mUTF8 \| LWS )* >mark;

	schemec = alnum \| "+" \| "-" \| ".";
	scheme = alpha schemec*;
	uric = reserved \| unreserved \| "%" \| "[" \| "]";
	uri = scheme ":" uric+;

	# Quoted strings can have just about anything, including backslash escapes,
	# which aren't quite as fancy as the ones you'd see in programming.
	qdtextc = 0x21 \| 0x23..0x5B \| 0x5D..0x7E;
	qdtext = UTF8_NONASCII \| LWS_append \| qdtextc @append;
	quoted_pair = "\\" ( 0x00..0x09 \| 0x0B..0x0C \| 0x0E..0x7F ) @append;
	quoted_content = ( qdtext \| quoted_pair )* >start;
	quoted_string = DQUOTE quoted_content DQUOTE;
	unquoted_string = ( token LWS )+;

	# Content Type Parsing
	#
	# This is easy-peasy. It almost always contains the value "application/sdp".
	# We're going to ignore the parameters, because this information is actually
	# stored in Msg by way of type interface and we don't support any types that
	# take parameters.
	ctype_param = SEMI token EQUAL ( token \| quoted_string );
	ctype_mime = ( token "/" token ) >mark %ContentType;
	ctype := ctype_mime ctype_param* CRLF @goto_header;

	# Parameter Parsing
	#
	# Parameters can be used by vias and addresses, but not URIs. They can look
	# like=this or like="this". The =value part is optional.
	param_name = token >mark %name;
	param_content = tokenhost @append;
	param_value = param_content \| quoted_string;
	param = param_name >start (EQUAL param_value)?;

	# Via Parsing
	#
	# Vias are used to trace SIP hops. It's similar to an address, but with simpler
	# syntax. Here's some examples:
	#
	# - Via: SIP/2.0/UDP 1.2.3.4:5060;branch=z9hG4bK-d1d81e94a099
	# - Via: SIP/2.0/TLS [feed:a::bee] ;branch="z9hG4bK-doge" ;rport=666
	#
	# Parsing these is kind of difficult because infinite whitespace is allowed
	# between colons, semicolons, commas, and don't forget that lines can
	# continue. So we're going to break things down into four separate machines
	# that jump between each other.
	ViaProtocol = token >mark %ViaProtocol;
	ViaVersion = token >mark %ViaVersion;
	ViaTransport = token >mark %ViaTransport;
	ViaSent = ViaProtocol SLASH ViaVersion SLASH ViaTransport;
	ViaHostIPv4 = ipv4c+ >mark %ViaHost;
	ViaHostIPv6 = "[" ipv6c+ >mark %ViaHost "]";
	ViaHostName = hostc+ >mark %ViaHost;
	ViaHost = ViaHostIPv4 \| ViaHostIPv6 \| ViaHostName;
	ViaPort = digit+ @ViaPort;
	via_param_end = CRLF @ViaParam @Via @goto_header
	\| SEMI <: any @ViaParam @hold @start @goto_via_param
	\| COMMA <: any @ViaParam @Via @ViaNew @hold @goto_via;
	via_param := param via_param_end;
	via_end = CRLF @Via @goto_header
	\| SEMI <: any @hold @start @goto_via_param
	\| COMMA <: any @Via @ViaNew @hold @goto_via;
	via := ViaSent LWS ViaHost (COLON ViaPort)? via_end;

	# Address Parsing
	#
	# These can come in the following forms, which can be comma-delimited:
	#
	# - Unangled: sip:lol.example;param
	# - Angled: <sip:lol.example;param>;param
	# - Unquoted: oh my goth <sip:boo@lol[feed:a::bee]:5060>
	# - Quoted: "oh my \"goth\"" <sip:lol.example>
	#
	# In order to tell the unangled and unquoted angled forms apart, we need to
	# look for ':' or '<' character and then backtrack to the appropriate machine.
	#
	# Because Addr and URI can both have parameters, one might wonder what happens
	# to them in the unmangled form. Are they owned by URI? Or are they owned by
	# Addr? The answer is the latter.
	#
	# The URIs themselves are parsed by a separate routine. All we do here is
	# extract the bytes and pass them along. It would be nice if we could put the
	# URI parsing in this file, where the URI parsing is invoked by fcall. But
	# that's not possible, because it appears Ragel Go is broken in that regard.
	addr_spec = LAQUOT uri >mark %AddrUri RAQUOT;
	addr_display = quoted_string >start %AddrQuotedDisplay
	\| unquoted_string >mark %AddrUnquotedDisplay;
	addr_param_end = CRLF @AddrParam @Addr @goto_header
	\| SEMI <: any @AddrParam @hold @goto_addr_param
	\| COMMA <: any @AddrParam @Addr @hold @goto_addr;
	addr_param := param addr_param_end;
	addr_angled_end = CRLF @Addr @goto_header
	\| SEMI <: any @hold @goto_addr_param
	\| COMMA <: any @Addr @hold @goto_addr;
	addr_angled := addr_display? addr_spec addr_angled_end;
	addr_uri_end = CRLF %Addr @goto_header
	\| SEMI <: any @hold @goto_addr_param
	\| COMMA <: any @Addr @hold @goto_addr;
	addr_uri := ( uri - ";" ) %AddrUri addr_uri_end;
	addr := [<\"] @AddrNew @hold @goto_addr_angled
	\| unquoted_string >mark "<" @AddrNew @backtrack @goto_addr_angled
	\| scheme >mark ":" @AddrNew @backtrack @goto_addr_uri;

	# Address Header Name Definitions
	#
	# These headers set the addr pointer to tell the 'value' machine where to
	# store the value after using ParseAddrBytes().
	aname = ("Contact"i \| "m"i) %{addrp=lastAddr(&msg.Contact)}
	\| ("From"i \| "f"i) %{addrp=lastAddr(&msg.From)}
	\| "P-Asserted-Identity"i %{addrp=lastAddr(&msg.PAssertedIdentity)}
	\| "Record-Route"i %{addrp=lastAddr(&msg.RecordRoute)}
	\| "Remote-Party-ID"i %{addrp=lastAddr(&msg.RemotePartyID)}
	\| "Route"i %{addrp=lastAddr(&msg.Route)}
	\| ("To"i \| "t"i) %{addrp=lastAddr(&msg.To)}
	;

	# String Header Name Definitions
	#
	# These headers set the value pointer to tell the 'value' machine where to
	# store the resulting token string.
	sname = "Accept"i %{value=&msg.Accept}
	\| ("Accept-Contact"i \| "a"i) %{value=&msg.AcceptContact}
	\| "Accept-Encoding"i %{value=&msg.AcceptEncoding}
	\| "Accept-Language"i %{value=&msg.AcceptLanguage}
	\| ("Allow"i \| "u"i) %{value=&msg.Allow}
	\| ("Allow-Events"i \| "u"i) %{value=&msg.AllowEvents}
	\| "Alert-Info"i %{value=&msg.AlertInfo}
	\| "Authentication-Info"i %{value=&msg.AuthenticationInfo}
	\| "Authorization"i %{value=&msg.Authorization}
	\| "Content-Disposition"i %{value=&msg.ContentDisposition}
	\| "Content-Language"i %{value=&msg.ContentLanguage}
	\| ("Content-Encoding"i \| "e"i) %{value=&msg.ContentEncoding}
	\| "Call-Info"i %{value=&msg.CallInfo}
	\| "Date"i %{value=&msg.Date}
	\| "Error-Info"i %{value=&msg.ErrorInfo}
	\| ("Event"i \| "o"i) %{value=&msg.Event}
	\| "In-Reply-To"i %{value=&msg.InReplyTo}
	\| "Reply-To"i %{value=&msg.ReplyTo}
	\| "MIME-Version"i %{value=&msg.MIMEVersion}
	\| "Organization"i %{value=&msg.Organization}
	\| "Priority"i %{value=&msg.Priority}
	\| "Proxy-Authenticate"i %{value=&msg.ProxyAuthenticate}
	\| "Proxy-Authorization"i %{value=&msg.ProxyAuthorization}
	\| "Proxy-Require"i %{value=&msg.ProxyRequire}
	\| ("Refer-To"i \| "r"i) %{value=&msg.ReferTo}
	\| ("Referred-By"i \| "b"i) %{value=&msg.ReferredBy}
	\| "Require"i %{value=&msg.Require}
	\| "Retry-After"i %{value=&msg.RetryAfter}
	\| "Server"i %{value=&msg.Server}
	\| ("Subject"i \| "s"i) %{value=&msg.Subject}
	\| ("Supported"i \| "k"i) %{value=&msg.Supported}
	\| "Timestamp"i %{value=&msg.Timestamp}
	\| "Unsupported"i %{value=&msg.Unsupported}
	\| "User-Agent"i %{value=&msg.UserAgent}
	\| "Warning"i %{value=&msg.Warning}
	\| "WWW-Authenticate"i %{value=&msg.WWWAuthenticate}
	;

	# Custom Header Definitions
	#
	# These headers do not jump to the 'value' machine, but instead specify
	# their own special type of parsing.
	cheader = ("Call-ID"i \| "i"i) $!gxh HCOLON cid >mark %CallID
	\| ("Content-Length"i \| "l"i) $!gxh HCOLON digit+ >{clen=0} @ContentLength
	\| "CSeq"i $!gxh HCOLON (digit+ @CSeq) LWS token >mark %CSeqMethod
	\| ("Expires"i \| "l"i) $!gxh HCOLON digit+ >{msg.Expires=0} @Expires
	\| ("Max-Forwards"i \| "l"i) $!gxh HCOLON digit+ >{msg.MaxForwards=0} @MaxForwards
	\| ("Min-Expires"i \| "l"i) $!gxh HCOLON digit+ >{msg.MinExpires=0} @MinExpires
	;

	# Header Parsing
	#
	# The header machine parses a single header and then jumps to itself to
	# loop. When the final CRLF is observed, we then break out of the Ragel
	# parser and let the Go code handle payload extraction.
	#
	# Parsing standard header names is a prefix trie search in generated code.
	# Lookahead to set the mark on the header name. In order to support
	# extended headers, we'll use $!gxh to jump to the xheader machine when an
	# unrecognized character is detected in the header name.
	#
	# An independent machine has been created for generic header values, so
	# that it doesn't need to be duplicated for each leaf in the prefix
	# trie. When the value machine has finished reading a value, it'll be
	# parsed and stored based on whether the value/addr pointers are set.
	#
	# Header values can span multiple lines. Lookahead is used in the LWS
	# definition to check for whitespace at the start of the next line upon
	# encountering a line feed character, in order to determine if a line
	# continuation is present.
	#
	# In order to concatenate across machines, we use lookahead in conjunction
	# with the left-guarded concatenation operator. This pattern works is
	# defined as follows: `foo <: any @hold @goto_bar`.
	#
	# Header names are case insensitive. Each recognized header is assigned to
	# a specific field in the Msg data structure. Extended headers are stored
	# to a linked list data structure with the casing preserved. This is so
	# messages can be reproduced with roughly the same appearance. It is the
	# responsibility of the person using Msg.Headers to do case-insensitive
	# string comparisons.
	value := hval <: CRLF @value @goto_header;
	xheader := token %name HCOLON <: any @{value=nil} @hold @goto_value;
	sheader = cheader <: CRLF @goto_header
	\| aname $!gxh HCOLON <: any @{value=nil} @hold @goto_addr
	\| sname $!gxh HCOLON <: any @hold @goto_value
	\| ("Via"i \| "v"i) $!gxh HCOLON <: any @ViaNew @hold @goto_via
	\| ("Content-Type"i \| "c"i) $!gxh HCOLON <: any @hold @goto_ctype;
	header := CRLF @break
	\| tokenc @mark @hold sheader;

	# Start Line Parsing
	#
	# The Request and Response definitions are very straightforward, and the
	# main machine is the union of the two. Once the line feed character has
	# been observed, we then jump to the header machine.
	# SIP Message Parsing
	Method = token >mark %Method;
	SIPVersionNo = digit+ @VersionMajor "." digit+ @VersionMinor;
	RequestURI = ^SP+ >mark %RequestURI;
	StatusCode = ( digit @StatusCode ) {3};
	ReasonPhrase = reasonmc+ >start %ReasonPhrase;
	SIPVersion = "SIP/" SIPVersionNo;
	Request = Method SP RequestURI SP SIPVersion CRLF @goto_header;
	Response = SIPVersion SP StatusCode SP ReasonPhrase CRLF @goto_header;
	Message = Request \| Response;

	}%%