Created
December 14, 2011 17:17
-
-
Save falms/1477516 to your computer and use it in GitHub Desktop.
webarchive to mhtml converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@echo off | |
setlocal | |
set SCRIPTDIR=%~dp0 | |
:exec | |
if "%~1"=="" goto end | |
echo "%~1" | |
php "%SCRIPTDIR%webarchive2mhtml.php" "%~1" | |
shift | |
goto exec | |
:end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once(dirname(__FILE__).'/cfpropertylist/CFPropertyList.php'); | |
if(!(isset($argv[1]) && is_file($argv[1]))){ | |
exit; | |
} | |
$fn = realpath($argv[1]); | |
$fninf = pathinfo($fn); | |
try{ | |
$plist = new CFPropertyList($fn); | |
$d = $plist->toArray(); | |
}catch(DOMException $e){ | |
print $fninf['basename']." is not valid webarchive(plist) file.".PHP_EOL; | |
exit; | |
} | |
if(!isset($d['WebMainResource'])){ | |
print $fninf['basename']." is not valid webarchive file.".PHP_EOL; | |
exit; | |
} | |
print 'Converting "'.$fninf['basename'].'" to "'.$fninf['filename'].'.mht" ... '; | |
$dm = &$d['WebMainResource']; | |
$boundary = '----=_NextPart_'.date("YmdHis").'.'.mt_rand(10000000, 99999999); | |
$m = array(); | |
$m[] = 'Date: '.date(DATE_RFC2822); | |
$m[] = 'MIME-Version: 1.0'; | |
$m[] = 'Content-Type: multipart/related;'; | |
$m[] = ' type="'.$dm['WebResourceMIMEType'].'";'; | |
$m[] = ' boundary="'.$boundary.'"'; | |
$m[] = ''; | |
$m[] = 'This is a multi-part message in MIME format.'; | |
$m[] = ''; | |
addPart($m, $dm['WebResourceMIMEType'].(isset($dm['WebResourceTextEncodingName'])?'; charset="'.$dm['WebResourceTextEncodingName'].'"':''), $dm['WebResourceURL'], $dm['WebResourceData']); | |
foreach($d['WebSubresources'] as $ds){ | |
addPart($m, $ds['WebResourceMIMEType'], $ds['WebResourceURL'], $ds['WebResourceData']); | |
} | |
$m[] = '--'.$boundary.'--'; | |
file_put_contents($fninf['dirname'].'/'.$fninf['filename'].'.mht', implode(PHP_EOL, $m)); | |
print "Done.".PHP_EOL; | |
exit; | |
function addPart(&$target, $cType, $cLocation, $cData){ | |
$target[] = '--'.$GLOBALS['boundary']; | |
if(preg_match("|^text/|", $cType)){ | |
$cTransEnc = 'quoted-printable'; | |
$cData = mb_convert_encoding($cData, 'Quoted-Printable', 'auto'); | |
}else{ | |
$cTransEnc = 'base64'; | |
$cData = chunk_split(base64_encode($cData)); | |
} | |
$target[] = 'Content-Type: '.$cType; | |
$target[] = 'Content-Transfer-Encoding: '.$cTransEnc; | |
$target[] = 'Content-Location: '.$cLocation; | |
$target[] = ''; | |
$target[] = $cData; | |
$target[] = ''; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment