Skip to content

Instantly share code, notes, and snippets.

@manpages
Created March 6, 2012 00:18
Show Gist options
  • Save manpages/1982404 to your computer and use it in GitHub Desktop.
Save manpages/1982404 to your computer and use it in GitHub Desktop.
Data preparation script for ЛЯМ-search module used at memorici.de
<?php
/*+++ CONFIG +++*/
$ramfs = '/mnt/ramdisk/';
$dbdir = '/home/sweater/coding/scripting/lyam/data/';
$url = 'http://forum.sc2tv.ru/archive/';
$file = 'index.php?t-14423';
$page_pattern = '|<a .*?>(?<pagenum>\d*)</a>|';
/*--- CONFIG ---*/
$ramdir = $ramfs.'data/';
$cpfile = $dbdir.'current.dat';
$pages = $dbdir.'cache.dat';
function fpc($x,$y) {return file_put_contents($y,$x);}
function fgc($x) {return file_get_contents($x); }
function last($a) {return $a[(count($a)-1)];}
function get_last_page() {
global $page_pattern, $url, $file;
$matches = array();
preg_match_all($page_pattern, fgc($url.$file.'.html'), $matches);
return last($matches['pagenum']);
}
function current_page() {
global $cpfile;
return (int)trim(fgc($cpfile), "\n ");
}
function linearize($html) {
return
str_replace('<hr />', '<hr />'."\n",
str_replace("\n", '', $html)
)
;
}
function update() {
global $url, $file, $ramdir, $dbdir, $pages, $cpfile;
$cp = current_page();
$np = get_last_page();
echo "<<INFO>> cp: $cp, np: $np\n\n\n";
for ($i = $cp; $i <= $np; ++$i) {
echo "<<GOGO>> $i\n";
$i_name = $file.'-p-'.$i.'.html';
unlink($dbdir.$i_name);
shell_exec('wget '.$url.$file.'-p-'.$i.'.html -P '.$dbdir);
fpc(linearize(fgc($dbdir.$i_name)), $dbdir.$i_name);
shell_exec('cp -rvut '.$ramdir.' '.$dbdir.$i_name);
fpc($ramdir.$i_name."\n".fgc($pages), $pages);
echo "<<DONE>> $i_name\n";
}
fpc ($np, $cpfile);
}
die(update());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment