Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@falms
Created December 14, 2011 17:17
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save falms/1477516 to your computer and use it in GitHub Desktop.
Save falms/1477516 to your computer and use it in GitHub Desktop.
webarchive to mhtml converter
@echo off
setlocal
set SCRIPTDIR=%~dp0
:exec
if "%~1"=="" goto end
echo "%~1"
php "%SCRIPTDIR%webarchive2mhtml.php" "%~1"
shift
goto exec
:end
<?php
require_once(dirname(__FILE__).'/cfpropertylist/CFPropertyList.php');
if(!(isset($argv[1]) && is_file($argv[1]))){
exit;
}
$fn = realpath($argv[1]);
$fninf = pathinfo($fn);
try{
$plist = new CFPropertyList($fn);
$d = $plist->toArray();
}catch(DOMException $e){
print $fninf['basename']." is not valid webarchive(plist) file.".PHP_EOL;
exit;
}
if(!isset($d['WebMainResource'])){
print $fninf['basename']." is not valid webarchive file.".PHP_EOL;
exit;
}
print 'Converting "'.$fninf['basename'].'" to "'.$fninf['filename'].'.mht" ... ';
$dm = &$d['WebMainResource'];
$boundary = '----=_NextPart_'.date("YmdHis").'.'.mt_rand(10000000, 99999999);
$m = array();
$m[] = 'Date: '.date(DATE_RFC2822);
$m[] = 'MIME-Version: 1.0';
$m[] = 'Content-Type: multipart/related;';
$m[] = ' type="'.$dm['WebResourceMIMEType'].'";';
$m[] = ' boundary="'.$boundary.'"';
$m[] = '';
$m[] = 'This is a multi-part message in MIME format.';
$m[] = '';
addPart($m, $dm['WebResourceMIMEType'].(isset($dm['WebResourceTextEncodingName'])?'; charset="'.$dm['WebResourceTextEncodingName'].'"':''), $dm['WebResourceURL'], $dm['WebResourceData']);
foreach($d['WebSubresources'] as $ds){
addPart($m, $ds['WebResourceMIMEType'], $ds['WebResourceURL'], $ds['WebResourceData']);
}
$m[] = '--'.$boundary.'--';
file_put_contents($fninf['dirname'].'/'.$fninf['filename'].'.mht', implode(PHP_EOL, $m));
print "Done.".PHP_EOL;
exit;
function addPart(&$target, $cType, $cLocation, $cData){
$target[] = '--'.$GLOBALS['boundary'];
if(preg_match("|^text/|", $cType)){
$cTransEnc = 'quoted-printable';
$cData = mb_convert_encoding($cData, 'Quoted-Printable', 'auto');
}else{
$cTransEnc = 'base64';
$cData = chunk_split(base64_encode($cData));
}
$target[] = 'Content-Type: '.$cType;
$target[] = 'Content-Transfer-Encoding: '.$cTransEnc;
$target[] = 'Content-Location: '.$cLocation;
$target[] = '';
$target[] = $cData;
$target[] = '';
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment