Skip to content

Instantly share code, notes, and snippets.

@dimmduh
Created June 14, 2012 13:36
Show Gist options
  • Save dimmduh/2930330 to your computer and use it in GitHub Desktop.
Save dimmduh/2930330 to your computer and use it in GitHub Desktop.
<?php
//парсинг баша.орг.ру
#error_reporting(0);
set_time_limit(0);
ini_set('memory_limit', '512M');
$db = new SQLite3('bash');
#$db->query('DELETE FROM quotes;');
for ($i = 1; $i <= 100; $i++){
$url = 'http://bash.im/byrating/' . $i;
if ( url_exists($url) ){
$content = file_get_contents($url);
preg_match_all('/<span id="v([0-9]+)" class="rating">([0-9]+)<\/span>/i', $content, $matches);
$ids = $matches[1];
$ratings = $matches[2];
preg_match_all('/<div class="text">(.*)<\/div>/im', $content, $matches);
$texts = $matches[1];
preg_match_all('/<span class="date">(.*)<\/span>/i', $content, $matches);
$text_dates = $matches[1];
foreach($ids as $key => $id){
//заменяем html сущности
$texts[$key] = str_replace(array('<br />', '<br/>', '<br>'), "\n", $texts[$key]);
$texts[$key] = str_replace(array('&quot;', '&lt;', '&gt;', '&amp;', '&#8206;', '&#039;'), array('"', '<', '>', '&', '', '\''), $texts[$key]);
$sql = 'INSERT INTO quotes(id, text, date, rating) VALUES (\'' . $id . '\', \'' . sqlite_escape_string( iconv('windows-1251', 'utf-8', $texts[$key]) ) . '\', \'' . strtotime($text_dates[$key]) . '\', \'' .$ratings[$key] . '\' );';
$db->query( $sql );
}
}
}
$db->close();
echo 'finished!';
function url_exists($url) {
$file_headers = @get_headers($url);
return $file_headers[0] == 'HTTP/1.1 200 OK';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment