-
-
Save pavel-voronin/e1656b6a3c2cde6c4f83 to your computer and use it in GitHub Desktop.
The-Tale.org accounts info grabber
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
for date_part in $(ls -1 result/*.csv | rev | cut -c 14- | rev | uniq | awk -v date=`date +"%Y-%m-%d"` 'substr($0, length($0) - 9) != date') | |
do | |
if [ -d result/temp ]; then | |
rm -rf result/temp | |
fi | |
mkdir result/temp | |
mv $date_part-* result/temp | |
prev='' | |
for O in $(ls -1 result/temp/*.csv) | |
do | |
if [ $prev ]; then | |
diff -u0 $prev $O > $O.diff | |
fi | |
prev=$O | |
done | |
ls -1 result/temp/*.csv | tail -n+2 | xargs rm -f | |
tar -zcf $date_part.tar.gz -C result/temp/ . | |
rm -rf result/temp | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/php | |
<?php | |
$domain = 'http://the-tale.org'; | |
$common_pattern = '/<tr>\s*<td>(\d+)<\/td>\s*<td><a href="\/accounts\/(\d+)">([^<]+)<\/a><\/td>\s*<td>(\w+)\s*(\w+)\s*<a href="\/game\/heroes\/(\d+)">([^<]+)<\/a><\/td>\s*<td>([^<]+)<\/td>\s*<\/tr>/su'; | |
$urls = [ | |
'help_count' => ['/game/ratings/help_count?page=', $common_pattern], | |
'achievements_points' => ['/game/ratings/achievements_points?page=', $common_pattern], | |
'referrals_number' => ['/game/ratings/referrals_number?page=', $common_pattern], | |
'pvp_battles_1x1_victories' => ['/game/ratings/pvp_battles_1x1_victories?page=', $common_pattern], | |
'pvp_battles_1x1_number' => ['/game/ratings/pvp_battles_1x1_number?page=', $common_pattern], | |
'phrases' => ['/game/ratings/phrases?page=', $common_pattern], | |
'level' => ['/game/ratings/level?page=', $common_pattern], | |
'power' => ['/game/ratings/power?page=', $common_pattern], | |
'bills' => ['/game/ratings/bills?page=', $common_pattern], | |
'might' => ['/game/ratings/might?page=', $common_pattern], | |
'accounts' => ['/accounts/?prefix=&page=', '/<tr class="pgf-account-record">\s*<td>\s*<a href="\/accounts\/(\d+)">([^<]+)<\/a>\s*(?:<a href="\/accounts\/clans\/(\d+)">\[([^\]]+)\]<\/a>\s*)?<\/td>\s*<td>(\w+)<\/td>\s*<td class="updated-at pgf-format-date" data-timestamp="(\d+).0"><\/td>\s*<td>\s*(\w+)\s*(\w+)\s*<a href="\/game\/heroes\/(\d+)">([^<]+)<\/a>\s*<\/td>\s*<td>(\d+)<\/td>\s*<td>(\d+)<\/td>\s*<\/tr>/su'], | |
]; | |
function checkEnvironment() | |
{ | |
if(php_sapi_name() !== 'cli') | |
die('Run me from console, not web.' . PHP_EOL); | |
global $argv, $argc; | |
if(!isset($argc) || !isset($argv)) | |
die('Where are you talking to me from?' . PHP_EOL); | |
if(!is_writable('.')) | |
die('Current directory is not writable by me.' . PHP_EOL); | |
if(!file_exists('result')) | |
{ | |
@mkdir('result'); | |
if(!file_exists('result')) | |
die('I cannot create result folder. Check my rights.' . PHP_EOL); | |
@chmod('result', 0777); | |
if(!is_writable('result')) | |
die('Result directory is not writable by me.' . PHP_EOL); | |
} | |
elseif(!is_dir('result')) | |
die('`Result\' is not a directory. Move it out of current directory and restart me.' . PHP_EOL); | |
} | |
function getProject() | |
{ | |
if(!file_exists('.project')) | |
return null; | |
$project = @require_once('.project'); | |
if(!is_array($project)) | |
die('Project file is corrupted. Delete it and restart me.' . PHP_EOL); | |
checkProject($project); | |
return $project; | |
} | |
function getUrl($part) | |
{ | |
return $domain . urldecode($part); | |
} | |
function checkProject($project) | |
{ | |
global $urls; | |
foreach($project as $url) | |
{ | |
foreach($urls as $one) | |
if($url[0] === $one[0]) | |
continue 2; | |
die('Unknown url: ' . $url[0] . PHP_EOL); | |
} | |
} | |
function help($command = null, $arg = null) | |
{ | |
if($command == null || $command == 'help' && $arg == null) | |
{ | |
echo 'Usage: php ' . basename(__FILE__) . ' help this help page' . PHP_EOL; | |
echo ' php ' . basename(__FILE__) . ' help <command> specific command help' . PHP_EOL; | |
echo ' php ' . basename(__FILE__) . ' urls show all known urls' . PHP_EOL; | |
echo ' php ' . basename(__FILE__) . ' start <all|url1 url2 ... urlN> start grabbing of all urls or one or many specific urls' . PHP_EOL; | |
echo ' php ' . basename(__FILE__) . ' batch <all|url1 url2 ... urlN> create batch project for grabbing of all urls or one or many specific urls' . PHP_EOL; | |
echo ' php ' . basename(__FILE__) . ' step continue current batch project (1 step)' . PHP_EOL; | |
echo ' php ' . basename(__FILE__) . ' finish continue and finish current batch project' . PHP_EOL; | |
echo ' php ' . basename(__FILE__) . ' clean clean current ' . PHP_EOL; | |
} | |
elseif($command == 'help') | |
{ | |
if($arg == 'help') | |
echo 'Do you love recursions, ah? :)' . PHP_EOL; | |
elseif($arg == 'urls') | |
{ | |
echo 'Usage: php ' . basename(__FILE__) . ' urls' . PHP_EOL; | |
echo PHP_EOL; | |
echo ' Show all known urls (names). You can use them with commands `start\' and `batch\'.' . PHP_EOL; | |
} | |
elseif($arg == 'start') | |
{ | |
echo 'Usage: php ' . basename(__FILE__) . ' start <all|url1 url2 ... urlN>' . PHP_EOL; | |
echo PHP_EOL; | |
echo ' If `all\' keyword specified, start grabbing of all known urls immediately.' . PHP_EOL; | |
echo ' Otherwise, if one or some url names specified, start grabbing of this specific urls immediately.' . PHP_EOL; | |
} | |
elseif($arg == 'batch') | |
{ | |
echo 'Usage: php ' . basename(__FILE__) . ' batch <all|url1 url2 ... urlN>' . PHP_EOL; | |
echo PHP_EOL; | |
echo ' If `all\' keyword specified, create new project for grabbing of all known urls immediately.' . PHP_EOL; | |
echo ' Otherwise, if one or some url names specified, create new project for grabbing of this specific urls immediately.' . PHP_EOL; | |
echo ' In all cases grabbing will not start immediately, but you should start every step with yourself with instructions shown after project creating.' . PHP_EOL; | |
} | |
elseif($arg == 'step') | |
{ | |
echo 'Usage: php ' . basename(__FILE__) . ' step' . PHP_EOL; | |
echo PHP_EOL; | |
echo ' Make one step of current project made with `batch\' command. If you want to start new project, `clean\' the old one.' . PHP_EOL; | |
} | |
elseif($arg == 'finish') | |
{ | |
echo 'Usage: php ' . basename(__FILE__) . ' finish' . PHP_EOL; | |
echo PHP_EOL; | |
echo ' Make all steps of current project made with `batch\' command. If you want to start new project, `clean\' the old one.' . PHP_EOL; | |
echo ' My default command if project was started.' . PHP_EOL; | |
} | |
elseif($arg == 'clean') | |
{ | |
echo 'Usage: php ' . basename(__FILE__) . ' clean' . PHP_EOL; | |
echo PHP_EOL; | |
echo ' Delete current project files and project itself from current directory. Result files will stay.' . PHP_EOL; | |
} | |
else | |
echo 'I don\'t know this command. Sorry, dude.' . PHP_EOL; | |
} | |
exit(0); | |
} | |
function knownUrls() | |
{ | |
global $urls; | |
echo 'Known url names: ' . implode(', ' , array_keys($urls)) . PHP_EOL; | |
} | |
function startProject($urls) | |
{ | |
echo 'I am starting new project for you!' . PHP_EOL; | |
$project = createProject($urls); | |
finishProject($project); | |
} | |
function finishProject($project) | |
{ | |
while($project = stepProject($project, true)); | |
} | |
function calculateSize($url) | |
{ | |
global $domain; | |
if(($doc = file_get_contents($domain . urldecode($url[0]) . 1)) === false) | |
die('I failed to grab url ' . $domain . urldecode($url[0]) . $url[4] . ' to examine total pages count :(' . PHP_EOL); | |
$url[3] = (int)preg_replace('/[^\d]/', '', mb_substr($doc, mb_strrpos($doc, $url[0]) + mb_strlen($url[0]), 3)); | |
$url[3] = $url[3] ? $url[3] : 1; | |
return $url; | |
} | |
function calculateSizes($project) | |
{ | |
global $domain; | |
foreach($project as $id => $url) | |
$project[$id] = calculateSize($url); | |
saveProject($project); | |
return $project; | |
} | |
function batchProject($urls) | |
{ | |
echo 'I am batching new project for you!' . PHP_EOL; | |
$project = createProject($urls); | |
$project = calculateSizes($project); | |
foreach($project as $urlName => $url) | |
if(!isset($url[3])) | |
die('I don\'t know, why, but i don\'t know, how many pages are in url ' . $urlName . '.' . PHP_EOL); | |
if(($shFile = fopen('start.sh', 'w')) === false) | |
die('I cannot create start.sh. Do I have enough rights?' . PHP_EOL); | |
foreach($project as $urlName => $url) | |
for($i = 0; $i < $url[3] + 2; $i++) // 1 for compiling | |
fwrite($shFile, 'php ' . basename(__FILE__) . ' step' . PHP_EOL); | |
fclose($shFile); | |
echo 'Done! Now you can run `sh start.sh\' to process all the project OR you can use me that way: `php ' . basename(__FILE__) . ' step\' to get things done step-by-step.' . PHP_EOL; | |
} | |
function stepProject($project, $continue = false) | |
{ | |
global $domain; | |
foreach($project as $urlName => $url) | |
{ | |
if(isset($url[5])) | |
continue; | |
if(!isset($url[2])) | |
{ | |
$url[2] = $urlName; | |
@mkdir($url[2]); | |
@chmod($url[2], 0777); | |
if(!file_exists($url[2])) | |
die('I am unable to create a project directory :(' . PHP_EOL); | |
if(!is_writable($url[2])) | |
{ | |
@rmdir($url[2]); | |
die('Created project folder is not writable. I\'m sorry.' . PHP_EOL); | |
} | |
echo 'I have created directory ' . $url[2] . ' for url ' . $urlName . '.' . PHP_EOL; | |
$project[$urlName] = $url; | |
saveProject($project); | |
} | |
if(!isset($url[3])) | |
{ | |
$url = calculateSize($url); | |
$project[$urlName] = $url; | |
saveProject($project); | |
echo 'Url ' . $urlName . ' have ' . $url[3] . ' pages.' . PHP_EOL; | |
if($continue) | |
return $project; | |
else | |
exit(0); | |
} | |
if(!isset($url[4])) | |
{ | |
$url[4] = 1; | |
$project[$urlName] = $url; | |
saveProject($project); | |
} | |
if($url[3] < $url[4]) | |
{ | |
$csvName = 'result/' . $urlName . '-' . date('Y-m-d-H-i-s') . '.csv'; | |
echo 'I have fully grabbed url ' . $urlName . ' and now I am trying to compile it.' . PHP_EOL; | |
if(($csv = fopen($csvName, 'w')) === false) | |
die('I cannot create csv file. Do I have enough rights?' . PHP_EOL); | |
foreach(scandir($url[2], SCANDIR_SORT_NONE) as $file) | |
{ | |
if($file == '.' || $file == '..') | |
continue; | |
fwrite($csv, file_get_contents($url[2] . '/' . $file)); | |
} | |
fclose($csv); | |
$url[5] = true; | |
$project[$urlName] = $url; | |
saveProject($project); | |
echo 'Done! I have compiled data from url ' . $urlName . ' into ' . $csvName . PHP_EOL; | |
if($continue) | |
return $project; | |
else | |
exit(0); | |
} | |
echo 'I am starting a new step of grabbing url ' . $urlName . ' (' . $url[4] . '/' . $url[3] . ')' . PHP_EOL; | |
if(($doc = file_get_contents($domain . urldecode($url[0]) . $url[4])) === false) | |
die('I failed to grab url ' . $domain . urldecode($url[0]) . $url[4] . ' :( Try `step\' later.' . PHP_EOL); | |
preg_match_all($url[1], $doc, $data, PREG_SET_ORDER); | |
foreach($data as $id => $one) | |
unset($data[$id][0]); | |
if(($csv = fopen($url[2] . '/' . $url[4], 'w')) === false) | |
die('I cannot create piece of data csv file. Do I have enough rights?' . PHP_EOL); | |
foreach($data as $o) | |
fputcsv($csv, $o); | |
fclose($csv); | |
$url[4]++; | |
$project[$urlName] = $url; | |
saveProject($project); | |
echo 'Done!' . PHP_EOL; | |
if($continue) | |
return $project; | |
else | |
exit(0); | |
} | |
echo 'Wow! I have finished my work and now i\' m gonna clean this project.' . PHP_EOL; | |
cleanProject($project); | |
echo 'Done! Have a nice day!' . PHP_EOL; | |
if($continue) | |
return false; | |
else | |
exit(0); | |
return false; | |
} | |
function saveProject($project) | |
{ | |
file_put_contents('.project', '<?php' . PHP_EOL . 'return ' . var_export($project, true) . ';'); | |
} | |
function createProject($need_urls) | |
{ | |
global $urls; | |
if($need_urls === 'all') | |
$project = $urls; | |
else | |
{ | |
$project = []; | |
foreach($need_urls as $url) | |
if(isset($urls[$url])) | |
$project[$url] = $urls[$url]; | |
else | |
die('Unknown url name! Please, use only names from `urls\' command.' . PHP_EOL); | |
} | |
saveProject($project); | |
echo 'I have created project for urls: ' . implode(', ', array_keys($project)) . PHP_EOL; | |
return $project; | |
} | |
function cleanProject($project) | |
{ | |
@unlink('.project'); | |
if(file_exists('.project')) | |
die('I failed to delete project! What is wrong with my rights?' . PHP_EOL); | |
if(file_exists('start.sh')) | |
{ | |
@unlink('start.sh'); | |
if(file_exists('start.sh')) | |
die('I failed to delete start.sh! What is wrong with my rights?' . PHP_EOL); | |
} | |
foreach($project as $url) | |
{ | |
if(isset($url[2])) | |
{ | |
foreach(scandir($url[2]) as $path) | |
{ | |
if($path == '.' || $path == '..') | |
continue; | |
@unlink($url[2] . '/' . $path); | |
if(file_exists($url[2] . '/' . $path)) | |
die('I failed to delete project files! What is wrong with my rights?' . PHP_EOL); | |
} | |
@rmdir($url[2]); | |
if(file_exists($url[2])) | |
die('I failed to delete project files! What is wrong with my rights?' . PHP_EOL); | |
} | |
} | |
} | |
function route() | |
{ | |
global $argc, $argv, $project; | |
if($argc == 1) | |
if($project === null) | |
help(); | |
else | |
$project = finishProject($project); | |
else | |
if($argv[1] == 'help') | |
if(isset($argv[2])) | |
help($argv[1], $argv[2]); | |
else | |
help($argv[1]); | |
elseif($argv[1] == 'urls') | |
knownUrls(); | |
elseif($argv[1] == 'start') | |
{ | |
if($project !== null) | |
die('I can\'t because of existing project. `clean\' it first.' . PHP_EOL); | |
if($argc < 3) | |
die('What urls should I grab, master? Maybe, all of them?' . PHP_EOL); | |
if(in_array('all', $argv)) | |
startProject('all'); | |
else | |
startProject(array_slice($argv, 2)); | |
} | |
elseif($argv[1] == 'batch') | |
{ | |
if($project !== null) | |
die('I can\'t because of existing project. `clean\' it first.' . PHP_EOL); | |
if($argc < 3) | |
die('What urls should I grab, master? Maybe, all of them?' . PHP_EOL); | |
if(in_array('all', $argv)) | |
batchProject('all'); | |
else | |
batchProject(array_slice($argv, 2)); | |
} | |
elseif($argv[1] == 'step') | |
{ | |
if($project !== null) | |
$project = stepProject($project); | |
else | |
die('No one project was started. First run `batch\' command.' . PHP_EOL); | |
} | |
elseif($argv[1] == 'finish') | |
{ | |
if($project !== null) | |
$project = finishProject($project); | |
else | |
die('No one project was started. First run `batch\' command.' . PHP_EOL); | |
} | |
elseif($argv[1] == 'clean') | |
{ | |
if($project !== null) | |
cleanProject($project); | |
exit(0); | |
} | |
} | |
checkEnvironment(); | |
$project = getProject(); | |
route(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment