Skip to content

Instantly share code, notes, and snippets.

@vibbow
Created January 22, 2014 23:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save vibbow/8569991 to your computer and use it in GitHub Desktop.
Save vibbow/8569991 to your computer and use it in GitHub Desktop.
PHP 全文搜索脚本
<?php
define ('DB_SOURCE', 'd:\\data');
define ('CACHE_LIMIT', 4194304);
define ('RESULT_LIMIT', 1000);
define ('TIME_LIMIT', 600);
$begin = microtime(true);
set_time_limit(TIME_LIMIT + 100);
ob_end_flush();
echo <<< EOF
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=GBK" />
<title>Full text search</title>
</head><body>
<form method="get" action="">
<input type="text" name="keyword" />
<input type="submit" />
</form>
EOF;
flush();
$keyword = isset($_REQUEST['keyword']) ? trim($_REQUEST['keyword']) : '';
if (empty($keyword)) exit('</body></html>');
$filelist = array();
get_file_list(DB_SOURCE . '\\*');
$count = 0;
echo 'Search ' . $keyword . ' in ' . count($filelist) . " leak databases ...<br />\r\n";
flush();
foreach ($filelist as $filepath) {
$fp = fopen($filepath, 'r');
if (!$fp) continue;
$basename = basename($filepath);
$filesize = filesize($filepath);
$fp_start_pos = 0;
while($fp_start_pos !== $filesize) {
fseek($fp, $fp_start_pos);
$content = fread($fp, CACHE_LIMIT);
$content_length = strlen($content);
if ($fp_start_pos + $content_length !== $filesize) {
$content_length = strrpos($content, "\n") + 1;
$content = substr($content, 0, $content_length);
}
$fp_start_pos += $content_length;
$keyword_pos = 0;
while (($keyword_pos = strpos($content, $keyword, $keyword_pos)) !== false)
{
$start_pos = strrpos($content, "\n", -$content_length + $keyword_pos);
$end_pos = strpos($content, "\n", $keyword_pos);
if ($end_pos === FALSE) $end_pos = $content_length;
echo $basename . ' | ' . trim(substr($content, $start_pos, $end_pos - $start_pos)) . "<br />\r\n";
flush();
$keyword_pos = $end_pos;
$count++;
if ($count >= RESULT_LIMIT) break;
}
if ($count >= RESULT_LIMIT) break;
}
fclose($fp);
if ($count >= RESULT_LIMIT) break;
if ((microtime(true) - $begin) >= TIME_LIMIT) break;
}
if ($count >= RESULT_LIMIT)
echo "Too many results, give up<br />\r\n";
if ((microtime(true) - $begin) >= TIME_LIMIT)
echo "Search time out, give up<br />\r\n";
echo 'Search complete, get ' . $count . ' results, cost ' . (microtime(true) - $begin) . " seconds<br />\r\n";
echo '</body></html>';
flush();
function get_file_list($dbsource) {
global $filelist;
$current_file_list = glob($dbsource);
foreach ($current_file_list as $each) {
if (strpos($each, 'search.php') === true)
continue;
if (is_file($each))
$filelist[] = $each;
if (is_dir($each))
get_file_list($each . '\\*');
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment