Skip to content

Instantly share code, notes, and snippets.

@kupp1
Last active August 15, 2018 18:32
Show Gist options
  • Save kupp1/937ff2c237c201874280f03984452231 to your computer and use it in GitHub Desktop.
Save kupp1/937ff2c237c201874280f03984452231 to your computer and use it in GitHub Desktop.
A regular expression that parses RFC2812 (IRC protocol)
# original work by Michael F. Lamb. License: GPLv3.
# added optional tilde before !user by kupp
RFC2812Matcher = ///
^ # We'll match the whole line. Start.
# Optional prefix and the space that separates it
# from the next thing. Prefix can be a servername,
# or nick[[!user]@host]
(?::( # This whole set is optional but if it's
# here it begins with : and ends with space
([^@!\ ]*) # nick
(?: # then, optionally user/host
(?: # but user is optional if host is given
!~?([^@]*) # !(~)user
)? # (user was optional)
@([^\ ]*) # @host
)? # (host was optional)
)\ )? # ":nick!user@host " ends
([^\ ]+) # IRC command (required)
# Optional args, max 15, space separated. Last arg is
# the only one that may contain inner spaces. More than
# 15 words means remainder of words are part of 15th arg.
# Last arg may be indicated by a colon prefix instead.
# Pull the leading and last args out separately; we have
# to split the former on spaces.
(
(?:
\ [^:\ ][^\ ]* # space, no colon, non-space characters
){0,14} # repeated up to 14 times
) # captured in one reference
(?:\ :?(.*))? # the rest, does not capture colon.
$ # EOL
///
parse = (line) ->
res = RFC2812Matcher.exec line
if (! res) return invalid: line
raw: line # Whole line
prefix: res[1] # complete prefix
nick: res[2] # or servername
username: res[3]
hostname: res[4]
command: res[5]
params: res[6]
.split ' '
.slice 1 # First char of args is always ' '
.concat if res[7] then res[7] else []
import re
RFC2812 = re.compile(r"""
^ # We'll match the whole line. Start.
# Optional prefix and the space that separates it
# from the next thing. Prefix can be a servername,
# or nick[[!user]@host]
(?::( # This whole set is optional but if it's
# here it begins with : and ends with space
([^@!\ ]*) # nick
(?: # then, optionally user/host
(?: # but user is optional if host is given
!~?([^@]*) # !user
)? # (user was optional)
@([^\ ]*) # @host
)? # (host was optional)
)\ )? # ":nick!user@host " ends
([^\ ]+) # IRC command (required)
# Optional args, max 15, space separated. Last arg is
# the only one that may contain inner spaces. More than
# 15 words means remainder of words are part of 15th arg.
# Last arg may be indicated by a colon prefix instead.
# Pull the leading and last args out separately; we have
# to split the former on spaces.
(
(?:
\ [^:\ ][^\ ]* # space, no colon, non-space characters
){0,14} # repeated up to 14 times
) # captured in one reference
(?:\ :?(.*))? # the rest, does not capture colon.
$ # EOL""", re.X)
def parse(data):
recognized_data = RFC2812.search(data).groups()
info = {}
info['raw'] = data
info['ident'] = recognized_data[0]
info['nick'] = recognized_data[1]
info['username'] = recognized_data[2]
info['host'] = recognized_data[3]
info['command'] = recognized_data[4]
info['params'] = recognized_data[5][1:]
return info
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment