Skip to content

Instantly share code, notes, and snippets.

@matsubo
Created August 30, 2018 20:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matsubo/e6d35679558a7e50984fbc966b60dd73 to your computer and use it in GitHub Desktop.
Save matsubo/e6d35679558a7e50984fbc966b60dd73 to your computer and use it in GitHub Desktop.
fire uploader HTML scraping
<?php
ini_set('error_reporting', false);
require_once('htmlsql/snoopy.class.php');
require_once('htmlsql/htmlsql.class.php');
$id = $_REQUEST['id'];
$wsql = new htmlsql();
$wsql->set_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; ja; rv:1.9.2) Gecko/20100115 Firefox/3.6 Jingoo/0.1.4 (.NET CLR 3.5.30729)');
//print_r($wsql);
// connect to a URL
if (!$wsql->connect('url', sprintf('http://up.pandoravote.net/up%s/', $id))){
throw new Exception('Error while connecting: ' . $wsql->error);
}
$wsql->query('SELECT * FROM td WHERE $class == "img"');
?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>炎のアップローダ(3次エロ画像) RSS </title>
<link>http://matsu.teraren.com/blog/</link>
<atom:link href="http://matsu.teraren.com/fire/rss.php" rel="self" type="application/rss+xml" />
<description>Fire uploader 2 RSS</description>
<pubDate><?php print date('r'); ?></pubDate>
<?php
foreach($wsql->fetch_array() as $row){
$text = $row['text'];
if(preg_match('/(http:\/\/[^\']+)/', $text, $matches)){
$image_url = $matches[1];
}
if(preg_match('/([0-9a-zA-Z]+\.jpg)/', $text, $matches)){
$filename = $matches[1];
}
if(preg_match('#([0-9]{2})/([0-9]{2})/([0-9]{2})\([a-zA-Z]{3}\)([0-9]{2}:[0-9]{2})#', $text, $matches)){
$date = '20'.$matches[1].'-'.$matches[2].'-'.$matches[3].' '.$matches[4].':00';
$date_exp = date('r', strtotime($date));
}
print "<item>\n";
print "<guid>$image_url</guid>\n";
print "<title>$filename</title>\n";
print "<link>$image_url</link>\n";
print "<description>&lt;img src=&quot;$image_url&quot; /&gt;</description>\n";
print "<pubDate>$date_exp</pubDate>\n";
print "</item>\n";
}
?>
</channel>
</rss>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment