Skip to content

Instantly share code, notes, and snippets.

@pv2b
Last active October 31, 2024 15:33
Show Gist options
  • Save pv2b/0df7b5badef90e43d2a6f505fcdb0954 to your computer and use it in GitHub Desktop.
Save pv2b/0df7b5badef90e43d2a6f505fcdb0954 to your computer and use it in GitHub Desktop.
// This is a terrible, imperfect implementation of RFC1342 in KQL, used to convert subject headers as logged
// by postfix into readable text. This should never have been written. Bask in its terribleness.
let subjects = filteredSyslogs
| where SyslogMessage has "info: header Subject: "
| parse kind=regex SyslogMessage with "postfix[/]cleanup[[]" * ": info: header Subject: " RawSubject " from [a-z0-9-.]+[[][0-9a-f:.]+[]]" *
// This regex extracts rfc1342 "words" from the subject line, as well as "non-words". This is needed because
// there's no way in KQL to grab the strings between matches. This also allows for the case where an RFC1342 header isn't
// properly terminated... and also the case where there's some extra junk after the last ?=.
| extend Match=extract_all(@"((?:=\?([^?]{1,40})\?([BQ])\?([^?]*)\??=?[^\s]*\s*)|.[^=]*)", RawSubject)
| mv-apply Match on (
project RawSubjectPart = tostring(Match[0]), Charset = tostring(Match[1]), Encoding = tostring(Match[2]), EncodedData = tostring(Match[3])
| extend Base64 = iff(
Encoding=="Q",
base64_encode_tostring(
// We abuse url_decode to decode quoted-printable strings...
url_decode(
// ... by performing this tomfoolery to convert quoted-printable strings into URL encoding.
replace_strings(
EncodedData,
dynamic(["+", "_", "%" , "="]),
dynamic(["%2b", "+", "%25", "%"])
)
)
),
// This part truncates the base64 string so it's a multiple of 4 bytes long.
// This deals with the scenario where the headers are truncated in the middle of a
// base64 sequence, which would otherwise cause base64_decode_* to return nothing
// at all.
extract(@"([A-Za-z0-9+/=]{4})*", 0, EncodedData)
)
| extend Decoded =
// This works because base64_decode_tostring() specifies UTF-8.
iff(Charset=~"UTF-8", base64_decode_tostring(Base64),
// This "kinda" works because unicode codepoints 128-255 map exactly to ISO-8859-1's corresponding bytes,
// and because ISO-8859-15 and Windows-1252 are "close enough" to ISO-8859-1 that most things decode fine.
iff(Charset=~"Windows-1252" or Charset=~"ISO-8859-1" or Charset=~"ISO-8859-15", unicode_codepoints_to_string(base64_decode_toarray(Base64)),
RawSubjectPart))
) | summarize Subject=strcat_array(make_list(Decoded), "") by QueueId;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment