Skip to content

Instantly share code, notes, and snippets.

@zwjzxh520
Last active February 1, 2022 13:59
Show Gist options
  • Save zwjzxh520/4444e276db0db5423dfc3dd0e437408d to your computer and use it in GitHub Desktop.
Save zwjzxh520/4444e276db0db5423dfc3dd0e437408d to your computer and use it in GitHub Desktop.
php写的nginx 日志分析
<?php
/**
* nginx日志文件分析。
* 根据配置的nginx日志格式,分析对应的日志内容。各变量
*/
class NginxLog {
protected static $br = "\n";
/**
* 根据日志格式分析nginx访问日志。
* 格式示例:"$remote_addr" $request_time - $remote_user d [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for"
* @param array $log 访问日志数组
* @param string $format 访问日志格式
* @return array 分析后的访问日志数组。key是访问日志变量名, value是该变量对应的日志信息
*/
public function analysisAccess($log, $format)
{
$logVars = $this->parseFormat($format);
$log = explode(self::$br, $log);
$formatLen = strlen($format);
$logArr = [];
foreach ($log as $lineNum=>$line) {
$line = trim($line);
if (empty($line)) {
continue;
}
//日志内容偏移量
$linePos = 0;
//日志格式偏移量
$formatPos = 0;
foreach ($logVars as $var => $info) {
//本变量内容的起始位置。
$linePos += $info['prevLen'];
//本变量日志内容的结束位置
if ($info['endChar']) {
$varLogEnd = strpos($line, $info['endChar'], $linePos);
} else {
$varLogEnd = 0;
}
$logArr[$lineNum][ $var ] = substr($line, $linePos, $varLogEnd ? $varLogEnd - $linePos : null);
//下一个变量的起始位置
if ($varLogEnd) {
$linePos += $varLogEnd - $linePos;
}
}
}
return $logArr;
}
/**
* 分析nginx错误日志。
* 由于有些错误信息,被分割成多行(例如php fatal error的堆栈错误),因此默认情况下,
* 利用$mergeLog变量保留错误日志,将这些错误日志合并成一行进行分析。
* 如果是不完整的错误日志,将不会进行分析。
* 如果得到完整的错误日志, $mergeLog 会被重置。
* 提供 $reset 参数,用于清空 $mergeLog 变量。
* @param array $logLines 错误日志数组
* @param boolean $reset 是否重置上次分析保留不完整错误日志
* @return array 分析后的错误日志数组
*/
public function analysisError($logLines, $reset = false)
{
$result = [];
$yinhaoCount = 2;
static $mergeLog = '';
if ($reset) {
$mergeLog = '';
}
foreach ($logLines as $lineNum => $log) {
if (substr_count($log, '"') >= $yinhaoCount) {
if ($mergeLog) {
$log = $mergeLog.$log;
$mergeLog = '';
}
$logResult = $this->parseLineError($log);
if (!empty($logResult)) {
$result[] = $logResult;
}
} else {
$mergeLog .= $log."\n";
}
}
return $result;
}
/**
* 对一行错误日志进行分析。
* 完整的日志,将会以时间开头,如果不是,将不会分析。
* 返回结果:
* [
* 'datetime' => '错误发生的时间戳'
* 'type' => 'nginx错误类型'
* 'fd' => 'nginx线程id'
* 'msg' => 'nginx错误类型说明'
* 'desc' => 'nginx错误详细提示'
* 'http' => ['http 请求相关信息']
* 'phperr' => 'php错误类型,如果不是php错误,则为空'
* ]
* @param string $log 一行完整的错误日志
* @return array [description]
*/
protected function parseLineError($log)
{
$datetimeLen = 19; //日期字符串长度
$metaPos = $datetimeLen + 1; //meta字符截取起始位置
$datetime = substr($log, 0, 19);
$result = [];
if ($this->isDateTimeFormat($datetime)) {
//日期时间
$result['datetime'] = strtotime($datetime);
$metaEndPos = strpos($log, '"', $metaPos);
$errorDescEndPos = strpos($log, '"', $metaEndPos + 1);
$metaStr = trim(substr($log, $metaPos, $metaEndPos - $metaPos));
$metaArr = explode(' ', $metaStr);
//错误类型
$result['type'] = substr($metaArr[0], 1, -1);
//文件描述符。nginx进程相关
$result['fd'] = $metaArr[1].' '.$metaArr[2];
//错误类型说明
$result['msg'] = rtrim(implode(' ', array_slice($metaArr, 3)), ':');
//错误提示说明
$result['desc'] = substr($log, $metaEndPos +1, $errorDescEndPos - $metaEndPos - 1);
//http相关信息
$result['http'] = $this->parseErrorHTTP(trim(substr($log, $errorDescEndPos + 1)));
//php错误,如果是php错误的话
$result['phperr'] = $this->getPHPErrorType($result['desc']);
}
return $result;
}
/**
* 分析php错误类型,如果不是php错误,则返回空
* @param string $str
* @return string
*/
protected function getPHPErrorType($str)
{
//PHP message:
$type = '';
if ('PHP message:' === substr($str, 0, 12)) {
//为什么是17,因为PHP message: PHP Waring, 要加' PHP '的长度
$type = substr($str, 17, strpos($str, ':', 12) - 17);
}
return $type;
}
/**
* 解析http请求的相关信息。一般是从client: 字符串一直到日志结束
* 返回结果(以下字段不一定都有,根据日志而定):
* [
* '0' => 'nginx错误详细提示(desc)后,client关键词前的一段提示信息'
* 'client' => '客户端ip'
* 'server' => '域名'
* 'method' => '请求方法'
* 'uri' => '请求的uri'
* 'httpver' => 'http版本'
* 'upstream' => 'php-fpm地址'
* 'host' => 'host头'
* 'referer' => '引用url地址'
* ]
* @param string $http
* @return array
*/
protected function parseErrorHTTP($http)
{
//client:
$split = strpos($http, 'client:');
$result[0] = substr($http, 0, $split - 2);
$other = explode(',', substr($http, $split));
foreach ($other as $value) {
$pos = strpos($value, ': ');
$key = trim(substr($value, 0, $pos));
$val = trim(substr($value, $pos + 2), '"');
if ('request' === $key) {
$result = array_merge($result, $this->parseRequest($val));
} else {
$result[$key] = $val;
}
}
return $result;
}
/**
* 解析http请求信息。是对"GET http:/www.baidu.com HTTP/1.1"的解析。
* 返回结果:
* [
* 'method' => '请求方法'
* 'uri' => '请求的uri'
* 'httpver' => 'http版本'
* ]
* @param string $request 示例:"GET http:/www.baidu.com HTTP/1.1"
* @return array
*/
protected function parseRequest($request)
{
$result = [];
$split = ' ';
$firstPos = strpos($request, $split);
$lastPos = strrpos($request, $split);
return [
'method' => substr($request, 0, $firstPos),
'uri' => trim(substr($request, $firstPos, $lastPos - $firstPos)),
'httpver' => trim(substr($request, $lastPos)),
];
}
/**
* 检查字符串是否是日期格式。固定格式:yyyy/mm/dd hh:mm:ss
* @param string $str 待检查的字符串
* @return boolean
*/
protected function isDateTimeFormat($str)
{
//2016/09/06 00:32:19
$number = '0123456789';
$format = '0000/00/00 00:00:00';
$strlen = strlen($str);
$result = true;
for($i=0; $i<$strlen; $i++) {
if ($format{$i} === '0') {
if (false === strpos($number, $str{$i})) {
$result = false;
break;
}
} elseif ($format{$i} !== $str{$i}) {
$result = false;
break;
}
}
return $result;
}
/**
* 分析访问日志格式,得出变量名称,并且保存变量在日志内容中的起始位置。
* @param string $format nginx配置文件中的日志格式
* @return array
*/
public function parseFormat($format)
{
$varCharList = 'abcdefghijklmnopqrstuvwxyz_$ABCDEFGHIJKLMNOPQRSTUVWXYZ';
$return = [];
$formatLen = strlen($format);
$logVar = '';
$prevStrLen = 0;
for ($i=0; $i < $formatLen; $i++) {
$char = $format{$i};
if (strpos($varCharList, $char) === false || (empty($logVar) && $char != '$')) {
if ($logVar) {
$return[$logVar]['endChar'] = $char; //变量后面紧挨着的字符,日志内容中,该字符的位置,即表示本变量的内容结束位置
$return[$logVar]['prevLen'] = $prevStrLen; //变量前面忽略的字符长度
$prevStrLen = 1;
$logVar = '';
} else {
$prevStrLen++;
}
} else {
$logVar .= $char;
}
}
if (!empty($logVar)) {
$return[$logVar]['endChar'] = '';
$return[$logVar]['prevLen'] = $prevStrLen;
$logVar = '';
$prevStrLen = 0;
}
return $return;
}
/**
* 读取大文件。
* $pos参数和$startLine参数,只设置其中一个,否则结果可能与预期不符。
* 指定$pos时,为保证读取到的是完整的一行,会将该位置所在行的数据全部读出。
* @param string $filepath 文件绝对路径
* @param integer $pos 读取的起始位置
* @param integer $startLine 读取的起始行
* @return [type] [description]
*/
public function readBigFile($filepath, $pos = 0, $startLine = 1)
{
$handle = fopen($filepath, 'rb');
$br = self::$br;
$brLen = strlen($br);
$lastShortLine = '';
$blockLen = 20480; //每次读取的数据块大小
if ($handle) {
if ($pos > 0) {
//移动到合适的位置,保证读取的是完整的一行数据
$movePos = 0;
while($pos - $movePos >= 0) {
fseek($handle, $pos - $movePos);
$c = fread($handle, 1);
if ($c === "\n" || $c === "\r") {
break;
}
$movePos ++;
}
if ($c !== "\n" && $c !== "\r") {
fseek($handle, -1, SEEK_CUR);
}
unset($movePos);
}
while ( ($data = fread($handle, $blockLen)) ) {
if (!feof($handle)) {
$dataSize = strlen($data);
//补上上次读取的,不完整的行数据
if ($lastShortLine){
$data = $lastShortLine.$data;
$dataSize = strlen($data);
}
$lastBrPos = strrpos($data, $br);
if ($lastBrPos !== false && $lastBrPos != $dataSize+$brLen) {
$lastShortLine = substr($data, $lastBrPos+$brLen);
} else {
$lastShortLine = '';
}
$result = $lastBrPos !== false ? substr($data, 0, $lastBrPos) : $data;
//定位起始行
if ($startLine > 1) {
$rows = substr_count($result, $br);
$startLine = $startLine - $rows;
if ($startLine > 1) {
continue;
} else {
$startpos = 0;
$startLine = abs($startLine);
while ($rows - $startLine > 1) {
$startpos += strpos($result, $br, $startpos) + $brLen;
$startLine++;
}
$result = substr($result, $startpos);
unset($rows, $startLine, $startpos);
}
}
} else {
$result = $lastShortLine.$data;
}
yield $result;
}
fclose($handle);
}
}
}
$format = '"$remote_addr" $request_time - $remote_user d [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for"';
$log = <<<LOG
"192.168.110.1" 0.040 - - d [05/Aug/2016:08:22:54 +0800] "GET /uc_server/data/avatar/006/40/73/36_real_avatar_small.jpg HTTP/1.0" 404 6664 "-" "-" "-"
"36.149.107.72" 3.016 - - d [05/Aug/2016:08:22:54 +0800] "GET /cardniu/api/api_splashinterface.php?udid=deviceId-866968026335845-generate-cardniu&systemName=android+OS&systemVersion=4.4.4&productName=Cardniu&productVersion=4&position=KNSQTZXQB&positionList=1&chanelSys=shequ HTTP/1.1" 200 5 "http://www.baidu.com/cardniu/detail.php?tid=552221&utm_source=552221&utm_medium=ribao&utm_campaign=xiaoxi" "Mozilla/5.0 (Linux; Android 4.4.4; A31 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36 feideeAndroid-V4 MymoneySms7.3.2-oppo" "-"
"192.168.110.1" 0.040 - - d [05/Aug/2016:08:22:54 +0800] "GET //uc_server/data/avatar/005/86/04/56_real_avatar_small.jpg HTTP/1.0" 404 6664 "-" "-" "-"
LOG;
// $log = file_get_contents('test.txt');
//测试访问日志
$startTime = microtime(true);
// var_export( (new NginxLog) -> parseFormat($format) );
var_export( (new NginxLog) -> analysisAccess($log, $format) );
echo microtime(true) - $startTime;
//测试错误日志
// $logFile = __DIR__.'/../testdata/nginx_access.log';
$errorLogFile = __DIR__.'/../testdata/www.baidu.com_error_2016-09-05.log';
$value = <<<LOG
2016/09/05 14:52:52 [error] 17890#0: *1424957037 FastCGI sent in stderr: "PHP message: PHP Fatal error: Uncaught exception 'DbException' with message 'Duplicate entry '240319164' for key 'sid'' in /var/www/html/bbs/source/class/db/db_driver_mysql.php:218
Stack trace:
#0 /var/www/html/bbs/source/class/db/db_driver_mysql.php(151): db_driver_mysql->halt('Duplicate entry...', 1062, 'UPDATE pre_com...')
#1 /var/www/html/bbs/source/class/db/db_driver_mysql_slave.php(62): db_driver_mysql->query('UPDATE pre_com...', false, false)
#2 /var/www/html/bbs/source/class/discuz/discuz_database.php(179): db_driver_mysql_slave->query('UPDATE pre_com...', false, false)
#3 /var/www/html/bbs/source/class/discuz/discuz_database.php(102): discuz_database::query('UPDATE pre_com...', '')
#4 /var/www/html/bbs/source/class/discuz/discuz_table.php(52): discuz_database::update('common_member', Array, '`uid`='10929277...', false, false)
#5 /var/www/html/bbs/m/register_by_email.php(72): discuz_table->update(10929277, Array)
#6 /var/www/html/bbs/m/function/ssj_function.php(788): register_by_ssjuser('', '" while reading response header from upstream, client: 192.168.241.97, server: www.baidu.com, request: "POST /m/api/credit.php HTTP/1.1", upstream: "fastcgi://127.0.0.1:9000", host: "www.baidu.com"
2016/09/06 05:11:54 [error] 10527#0: *915961403 open() "/var/www/html/cardniu/thread-544114" failed (2: No such file or directory), client: 180.153.205.253, server: www.baidu.com, request: "GET /thread-544114?10000skip=true-1-1.html HTTP/1.1", host: "www.baidu.com", referrer: "http://www.baidu.com/cardniu/detail.php?tid=544114?10000skip=true"
2016/09/06 06:22:15 [error] 10535#0: *916177977 FastCGI sent in stderr: "Primary script unknown" while reading response header from upstream, client: 111.127.121.50, server: www.baidu.com, request: "GET /cardniu/api/home.php?mod=spacecp&ac=usergroup&do=expiry HTTP/1.1", upstream: "fastcgi://127.0.0.1:9000", host: "www.baidu.com", referrer: "http://www.baidu.com/cardniu/kn_daily.php?stamptime=2016-09-05&udid=deviceId-868979027115530-generate-cardniu&bankcode=SPD,CMB,CEB"
LOG;
$startTime = microtime(true);
$nginxLog = new NginxLog;
$logNum = 0;
$phperrs = [];
foreach ($nginxLog -> readBigFile($errorLogFile) as $value) {
$logString = trim($value);
$logArr = explode("\n", $logString);
$logNum += count($logArr);
foreach ($nginxLog->analysisError($logArr) as $log) {
if (!empty($log['phperr'])) {
$phperrs[] = $log;
}
}
}
echo 'finish. cost time:'.(microtime(true) - $startTime)."\n";
echo '日志数量:'.$logNum."\n";
echo 'php错误日志数量:'.count($phperrs)."\n";
var_export($phperrs);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment