Last active
September 9, 2023 13:40
-
-
Save longxiao7/6a50183d386e270db04092bc9d78a0f6 to your computer and use it in GitHub Desktop.
PHP切割QQ群聊天记录文件生成 ZIP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
ini_set('pcre.backtrack_limit',1000000); | |
$is_table_end = false ; $page = 0 ;$contents = ''; $output_zip = './test.zip'; | |
#判断输入文件 | |
if(!file_exists($argv[1])) | |
{ | |
echo 'There isn\'t have this file.'; | |
exit; | |
} | |
#建立归档文件 默认为 | |
make_output_target($output_zip); | |
$handle = fopen($argv[1], "rb"); | |
#分段处理 | |
do | |
{ | |
$contents .=fread($handle,124416);#wtf | |
$contents = mht_process($contents,$output_zip); | |
} | |
while (!feof($handle)); | |
#主体数据处理 | |
function mht_process($contents,$output_zip){ | |
Global $is_table_end; | |
Global $page; | |
$zip = new ZipArchive; | |
if ($zip->open($output_zip,ZIPARCHIVE::CREATE) !== TRUE) | |
{ | |
echo 'create image failed'; | |
exit; | |
} | |
#判断非图片消息部分是否处理完毕 | |
if(false === $is_table_end) | |
{ | |
$html_head = '<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>QQ Message</title><style type="text/css">body{font-size:12px; line-height:22px; margin:2px;}td{font-size:12px; line-height:22px;}</style></head><body><table width=100% cellspacing=0>'; | |
$html_foot = '</table></body></html>'; | |
#判断消息部分是否完毕 只处理图片部分 | |
if(false !== strpos($contents,$html_foot)) | |
{ | |
$is_table_end = true; | |
} | |
$r = preg_match_all ('|<tr.*?\</tr\>|ims', $contents , $matches ,PREG_OFFSET_CAPTURE); | |
if($r) | |
{ | |
$matches = array_chunk($matches[0],200); | |
foreach($matches as $key=>$val) | |
{ | |
$arr = array_column($val,0); | |
array_walk( $arr , function(&$v, $k) use ($zip) | |
{ | |
$v = preg_replace('|<IMG src="{(\S)(\S)(\S+).dat|ims','<IMG src="../images/$1/$2/{$1$2$3.dat',$v); | |
}); | |
$zip->addFromString('messages/'.sprintf("%08d", $page+$key).'.html' ,$html_head . implode('',$arr).'<td> <H1><a href="./'.sprintf("%08d", $page+$key-1).'.html">Prev page</a></h1> <H1><a href="./'.sprintf("%08d", $page+$key+1).'.html">Next page</a></h1></td>'.$html_foot); | |
} | |
$page += $key; | |
$pos = end($val); | |
$contents = substr($contents,bcadd($pos[1],strlen($pos[0]),0)); | |
} | |
else | |
{ | |
$contents = ''; | |
} | |
} | |
#处理图片部分 | |
if(true === $is_table_end) | |
{ | |
#图片数据匹配 | |
$r = preg_match_all ('|Content-Type:image.*?:base64.*?Content-Location:(.*?)\.dat(.*?)(?:------=_)|ims', $contents , $matches ,PREG_OFFSET_CAPTURE | PREG_SET_ORDER); | |
if($r) | |
{ | |
//$matches = array_chunk($matches[0],200); | |
$result = array(); | |
foreach($matches as $key=>$val){ | |
$result = array('name'=>$val[1][0],'contents'=>$val[2][0]); | |
$dir = 'images/'. substr($result['name'],1,1) .'/' . substr($result['name'],2,1) .'/'.$result['name'] . '.dat'; | |
#写入图片表情文件到硬盘 | |
$zip->addFromString($dir,base64_decode(trim($result['contents']))); | |
$result = array(); | |
} | |
$result = array(); | |
$contents = substr($contents,$val[0][1]); | |
} | |
else | |
{ | |
$contents = ''; | |
} | |
} | |
$zip->close(); | |
#剩余部分返回 下一次处理拼接数据 | |
return $contents; | |
} | |
#建立保存目标 | |
function make_output_target($output='./test.zip') | |
{ | |
$zip = new ZipArchive; | |
if ($zip->open($output,ZIPARCHIVE::CREATE) !== TRUE) | |
{ | |
echo 'create images directory failed'; | |
exit; | |
} | |
$zip->addEmptyDir('images'); | |
$tmp = array('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'); | |
array_walk($tmp,function($val,$key) use ($tmp,$zip) | |
{ | |
$base_path = 'images/'.$val; | |
$zip->addEmptyDir($base_path); | |
array_walk($tmp,function($val,$key) use ($tmp,$zip,$base_path) | |
{ | |
$zip->addEmptyDir($base_path.'/'.$val); | |
}); | |
}); | |
$zip->addEmptyDir('messages'); | |
$zip->close(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment