Created
June 3, 2011 01:06
-
-
Save ktomk/1005669 to your computer and use it in GitHub Desktop.
Iterator to Extract Message Headers from Bounced Messages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* BouncedHeaders.php | |
* | |
* Copyright 2011 mot <@fsfe.org> | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation; either version 2 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | |
* MA 02110-1301, USA. | |
*/ | |
/** | |
* Iterator that extracts message headers from plain | |
* text that is normally the body of a bounce-email-message. | |
* | |
* Supports folded header field bodies. | |
* | |
* Iterator value is the header-field-value or complete line if not | |
* a header. | |
* Iterator key is the header-field-name or empty if not a header. | |
* | |
* To filter non-header lines, ignore zero-length keys. | |
* | |
* As there can be duplicate header field names, there can be | |
* duplicate keys. | |
* | |
* If initialized with retobj set to true, value will be an object | |
* containing detailed information and key will be a numerical index. | |
*/ | |
class BouncedHeaders implements Iterator | |
{ | |
/** | |
* @var string message body | |
*/ | |
private $body; | |
/** | |
* @var string line ending delimiter, "\r\n" by RFC (2)822 | |
*/ | |
private $crlf; | |
/** | |
* @var bool return object instead of string | |
*/ | |
private $retobj; | |
/** | |
* @var current's line header field | |
*/ | |
private $yykey; | |
/** | |
* @var int current parser position | |
*/ | |
private $yypos; | |
/** | |
* @var int iterator index | |
*/ | |
private $yycount; | |
/** | |
* @var int next parser position | |
*/ | |
private $yynext; | |
/** | |
* @var int line pointer | |
*/ | |
private $yyline; | |
/** | |
* @var int currents element length of lines | |
*/ | |
private $yylines; | |
/** | |
* @param string $body message body | |
* @param string $crlf (optional) line ending | |
*/ | |
public function __construct($body,$retobj=false,$crlf="\r\n") { | |
$this->body=(string)$body; | |
$this->crlf=(string)$crlf; | |
$this->retobj=(bool)$retobj; | |
} | |
/** | |
* get field-name from a line | |
* | |
* @param string $line | |
* @return string field name or zero-length string if not found. | |
*/ | |
public static function fieldName($line) | |
{ | |
// Header Name per RFC: 33 and 126, inclusive, except 58 colon | |
$r=preg_match('(^([\x21-\x39\x3B-\x7F]+)([\t ]*:)(.+)$)',$line,$matches); | |
if($r===false) throw new Exception('preg_match failed to process line.'); | |
return $r?$matches[1]:''; | |
} | |
private function getCurrentLine() | |
{ | |
$line=''; | |
$from=$this->yypos; | |
$crlflen=strlen($this->crlf); | |
$key=''; | |
$this->yylines=0; | |
do | |
{ | |
$next=strpos($this->body,$this->crlf,$from); | |
$next===false&&$next=strlen($this->body); | |
$length=$next-$from; | |
$line.=substr($this->body,$from,$length); | |
$from=$next+$crlflen; | |
$this->yylines++; | |
} | |
while | |
( | |
( | |
isset($key[0]) | |
||strlen($key=self::fieldName($line)) | |
) | |
&&isset($this->body[$from]) | |
&&strspn($this->body[$from],"\t ") | |
); | |
$this->yykey=$key; | |
$this->yynext=$from; | |
return $line; | |
} | |
public function current() | |
{ | |
static $lastyypos=-1,$line; | |
if ($lastyypos===$this->yypos) return $line; | |
$lastyypos=$this->yypos; | |
$line=$this->getCurrentLine(); | |
isset($this->yykey[0])&&$line=substr(strstr($line,':'),1); | |
if($this->retobj) | |
{ | |
$c = new stdClass; | |
$c->isfield=isset($this->yykey[0]); | |
$c->name=$this->yykey; | |
$c->value=$line; | |
$c->offset=$this->yypos; | |
$c->line=$this->yyline; | |
$c->lines=$this->yylines; | |
$line=$c; | |
} | |
return $line; | |
} | |
/** | |
* @return string | |
*/ | |
public function key() | |
{ | |
return $this->retobj?$this->yycount:$this->yykey; | |
} | |
public function next() | |
{ | |
$this->yypos=$this->yynext; | |
$this->yyline+=$this->yylines; | |
$this->yycount++; | |
} | |
public function rewind() | |
{ | |
$this->yypos=$this->yynext=$this->yyline=0; | |
$this->yycount=0; | |
$this->yykey=''; | |
} | |
public function valid() | |
{ | |
return isset($this->body[$this->yypos]); | |
} | |
} | |
/** | |
* FilterIterator to remove normal lines and optionally | |
* only return header with the name(s) given in the | |
* $filter argument. | |
*/ | |
class BouncedHeadersFiltered extends FilterIterator | |
{ | |
private $userFilter; | |
public function __construct($body, $filter=array() ) | |
{ | |
$iterator=new BouncedHeaders($body); | |
parent::__construct($iterator); | |
$this->userFilter = $filter; | |
} | |
public function accept() | |
{ | |
$key=$this->getInnerIterator()->key(); | |
if(is_int($key)) { | |
$key=$this->getInnerIterator()->current()->name; | |
} | |
$isHeader=(bool) strlen($key); | |
if ($isHeader && count($this->userFilter)) | |
{ | |
$isHeader = in_array($key, $this->userFilter); | |
} | |
return $isHeader; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment