Last active
November 21, 2016 15:26
-
-
Save requinix/68f2810b8a9824239c23 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// This code/file/output has no license, though attribution would be appreciated. | |
// Output at http://pastebin.com/2ZLehM5N | |
if ($_SERVER["QUERY_STRING"] == "source") { | |
highlight_file(__FILE__); | |
return; | |
} | |
$ALPHA = "[A-Za-z]"; | |
$CR = "\\r"; | |
$CRLF = "\\r\\n"; | |
$DIGIT = "[0-9]"; | |
$DQUOTE = "\\x22"; | |
$LF = "\\n"; | |
$VCHAR = "[\\x21-\\x7E]"; | |
$WSP = "[\\x20\\t]"; | |
/* | |
* RFC 5322: Internet Message Format <http://tools.ietf.org/html/rfc5322> | |
* | |
* $ADDR_SPEC is the form of an email address | |
* | |
* There is special PCRE syntax used to deal with the $COMMENT/$CCONTENT recursion, namely | |
* using (?P<foo>...) to name the first instance of the subpattern and (?P>foo) to recurse. | |
* Since there can't be multiple subpatterns with the same name, later instances of the subpattern | |
* use (?P>foo) to reference it. | |
* | |
* Note: this will validate an email address, but the SMTP standards have their own additional rules, | |
* such as fully-qualified domain names and maximum lengths. Just because this considers an email | |
* to be valid, SMTP may consider it invalid. Additionally, SMTP agents may take shortcuts and | |
* falsely accept or reject addresses when they should not. | |
*/ | |
// 4.1 Miscellaneous Obsolete Tokens | |
$OBS_NO_WS_CTL = "[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]"; | |
$OBS_CTEXT = "$OBS_NO_WS_CTL"; | |
$OBS_QP = "\\\\(\\0| $OBS_NO_WS_CTL | $LF | $CR )"; | |
$OBS_QTEXT = "$OBS_NO_WS_CTL"; | |
// 3.2.1 Quoted characters | |
$QUOTED_PAIR = "(\\\\( $VCHAR | $WSP )| $OBS_QP )"; | |
// 4.2 Obsolete Folding White Space | |
$OBS_FWS = "( $WSP )+( $CRLF ( $WSP )+)*"; | |
// 3.2.2 Folding White Space and Comments | |
$FWS = "((( $WSP )* $CRLF )?( $WSP )+| $OBS_FWS )"; | |
$CTEXT = "([\\x21-\\x27\\x2A-\\x5B\\x5D-\\x7E]| $OBS_CTEXT )"; | |
$CCONTENT = "( $CTEXT | $QUOTED_PAIR | (?P>COMMENT) )"; | |
$_COMMENT = "(?P<COMMENT>\\((( $FWS )? $CCONTENT )*( $FWS )?\\))"; | |
$COMMENT = '$COMMENT'; | |
$CFWS = "((( $FWS )? $COMMENT )+( $FWS )? | $FWS )"; | |
// 3.2.3 Atom | |
$ATEXT = "( $ALPHA | $DIGIT |[!#\$%&'*+\\-/=?^_`{|}~])"; | |
$ATOM = "( $CFWS )?( $ATEXT )+( $CFWS )?"; | |
$DOT_ATOM_TEXT = "( $ATEXT )+(\\.( $ATEXT )+)*"; | |
$DOT_ATOM = "( $CFWS )? $DOT_ATOM_TEXT ( $CFWS )?"; | |
// 3.2.4 Quoted Strings | |
$QTEXT = "([\\x21\\x23-\\x5B\\x5D-\\x7E] | $OBS_QTEXT )"; | |
$QCONTENT = "( $QTEXT | $QUOTED_PAIR )"; | |
$QUOTED_STRING = "( $CFWS )? $DQUOTE (( $FWS )? $QCONTENT )*( $FWS )? $DQUOTE ( $CFWS )?"; | |
// 3.2.5 Miscellaneous Tokens | |
$WORD = "( $ATOM | $QUOTED_STRING )"; | |
// 4.4 Obsolete Addressing | |
$OBS_LOCAL_PART = "$WORD (\\. $WORD )*"; | |
$OBS_DOMAIN = "$ATOM (\\. $ATOM )*"; | |
$OBS_DTEXT = "( $OBS_NO_WS_CTL | $QUOTED_PAIR )"; | |
// 3.4.1 Addr-Spec Specification | |
$LOCAL_PART = "( $DOT_ATOM | $QUOTED_STRING | $OBS_LOCAL_PART )"; | |
$DTEXT = "([\\x21-\\x5A\\x5E-\\x7E]| $OBS_DTEXT )"; | |
$DOMAIN_LITERAL = "( $CFWS )? \\[(( $FWS )? $DTEXT )*( $FWS )?\\]( $CFWS )?"; | |
$DOMAIN = "( $DOT_ATOM | $DOMAIN_LITERAL | $OBS_DOMAIN )"; | |
$ADDR_SPEC = "$LOCAL_PART @ $DOMAIN"; | |
/// | |
// deal with the recursion problem | |
// replace the first $COMMENT with the correct expression, then subsequent ones with the recursive call | |
$first = strpos($ADDR_SPEC, $COMMENT); | |
$ADDR_SPEC = substr($ADDR_SPEC, 0, $first) . $_COMMENT . substr($ADDR_SPEC, $first + strlen($COMMENT)); | |
$ADDR_SPEC = str_replace($COMMENT, "(?P>COMMENT)", $ADDR_SPEC); | |
// remove whitespace, escape incidental slashes, add ^ and $ anchors | |
$regex = '/^' . str_replace("/", "\\/", preg_replace('/\s+/', "", $ADDR_SPEC)) . '$/'; | |
?> | |
<pre style="word-wrap:break-word;"> | |
<?= htmlentities($regex) ?> | |
</pre> | |
<a href="<?= basename(__FILE__) ?>?source">Source rules</a> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment