Last active
October 31, 2024 15:33
-
-
Save pv2b/0df7b5badef90e43d2a6f505fcdb0954 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is a terrible, imperfect implementation of RFC1342 in KQL, used to convert subject headers as logged | |
// by postfix into readable text. This should never have been written. Bask in its terribleness. | |
let subjects = filteredSyslogs | |
| where SyslogMessage has "info: header Subject: " | |
| parse kind=regex SyslogMessage with "postfix[/]cleanup[[]" * ": info: header Subject: " RawSubject " from [a-z0-9-.]+[[][0-9a-f:.]+[]]" * | |
// This regex extracts rfc1342 "words" from the subject line, as well as "non-words". This is needed because | |
// there's no way in KQL to grab the strings between matches. This also allows for the case where an RFC1342 header isn't | |
// properly terminated... and also the case where there's some extra junk after the last ?=. | |
| extend Match=extract_all(@"((?:=\?([^?]{1,40})\?([BQ])\?([^?]*)\??=?[^\s]*\s*)|.[^=]*)", RawSubject) | |
| mv-apply Match on ( | |
project RawSubjectPart = tostring(Match[0]), Charset = tostring(Match[1]), Encoding = tostring(Match[2]), EncodedData = tostring(Match[3]) | |
| extend Base64 = iff( | |
Encoding=="Q", | |
base64_encode_tostring( | |
// We abuse url_decode to decode quoted-printable strings... | |
url_decode( | |
// ... by performing this tomfoolery to convert quoted-printable strings into URL encoding. | |
replace_strings( | |
EncodedData, | |
dynamic(["+", "_", "%" , "="]), | |
dynamic(["%2b", "+", "%25", "%"]) | |
) | |
) | |
), | |
// This part truncates the base64 string so it's a multiple of 4 bytes long. | |
// This deals with the scenario where the headers are truncated in the middle of a | |
// base64 sequence, which would otherwise cause base64_decode_* to return nothing | |
// at all. | |
extract(@"([A-Za-z0-9+/=]{4})*", 0, EncodedData) | |
) | |
| extend Decoded = | |
// This works because base64_decode_tostring() specifies UTF-8. | |
iff(Charset=~"UTF-8", base64_decode_tostring(Base64), | |
// This "kinda" works because unicode codepoints 128-255 map exactly to ISO-8859-1's corresponding bytes, | |
// and because ISO-8859-15 and Windows-1252 are "close enough" to ISO-8859-1 that most things decode fine. | |
iff(Charset=~"Windows-1252" or Charset=~"ISO-8859-1" or Charset=~"ISO-8859-15", unicode_codepoints_to_string(base64_decode_toarray(Base64)), | |
RawSubjectPart)) | |
) | summarize Subject=strcat_array(make_list(Decoded), "") by QueueId; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment