Last active
November 11, 2019 03:46
-
-
Save xtetsuji/b080e1f5551d17242f6415aba8a00239 to your computer and use it in GitHub Desktop.
parse mail headers by zero-length assertion regular expression (negative lookahead)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Data::Dumper; | |
my $header_string = <<'END_HEADER'; | |
Delivered-To: xtetsuji@example.jp | |
Received: by 2002:ff68:12bc:0:0:0:0:0 with SMTP id 2f8a1000492ce44c1b7c; | |
Fri, 8 Nov 2019 15:07:34 -0800 (PST) | |
X-Received: by 2002:a63:6581:: with SMTP id c451320e8476820844296b80587f9f3a; | |
Fri, 08 Nov 2019 15:07:34 -0800 (PST) | |
END_HEADER | |
print $header_string; | |
print "-" x 60, "\n"; | |
# 同じヘッダ名が出る場合には別の作戦が必要だけど、とりあえずキーバリューペアをハッシュに代入 | |
my %headers = $header_string =~ m{ | |
^ # 冒頭か改行の直後という意味での行頭から (m修飾子) | |
([\w-]+) # ヘッダ名があり | |
: \s* # コロンと空白0文字以上で区切られて | |
(.*?) # 値は改行を越えて続くかもしれない (s修飾子) | |
\n (?!\s) # 終端は、その直後が空白類文字でない改行 | |
}gmsx; | |
# XXX: このマッチの場合、末尾が改行でないと最後のヘッダは拾えない | |
# XXX: : \s* の部分、ヘッダ名とコロンが出た後にすぐ改行が来たら \n は \s に含まれるので誤動作するかもしれない | |
# 複数行に渡った値の空白を1個にまとめる | |
# values %headers で得た $_ は別名変数になっており、破壊が元のハッシュへ伝わる | |
s/\n\s+/ / for values %headers; | |
print Dumper(\%headers); | |
__END__ | |
出力は以下のようになる | |
Delivered-To: xtetsuji@example.jp | |
Received: by 2002:ff68:12bc:0:0:0:0:0 with SMTP id 2f8a1000492ce44c1b7c; | |
Fri, 8 Nov 2019 15:07:34 -0800 (PST) | |
X-Received: by 2002:a63:6581:: with SMTP id c451320e8476820844296b80587f9f3a; | |
Fri, 08 Nov 2019 15:07:34 -0800 (PST) | |
------------------------------------------------------------ | |
$VAR1 = { | |
'Delivered-To' => 'xtetsuji@example.jp', | |
'Received' => 'by 2002:ff68:12bc:0:0:0:0:0 with SMTP id 2f8a1000492ce44c1b7c; Fri, 8 Nov 2019 15:07:34 -0800 (PST)', | |
'X-Received' => 'by 2002:a63:6581:: with SMTP id c451320e8476820844296b80587f9f3a; Fri, 08 Nov 2019 15:07:34 -0800 (PST)' | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment