Skip to content

Instantly share code, notes, and snippets.

@hashborgir
Created March 17, 2017 14:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hashborgir/c99ac75e2294f038f06e90b895af2419 to your computer and use it in GitHub Desktop.
Save hashborgir/c99ac75e2294f038f06e90b895af2419 to your computer and use it in GitHub Desktop.
#!/usr/bin/php
<?php
gc_enable();
ini_set('display_errors', 1);
error_reporting(E_ALL);
if (file_exists('rb.php')) {
require_once 'rb.php';
try {
if (R::setup('mysql:host=localhost;dbname=', '', '')) {
} else {
throw new Exception('ERROR: DB not connected' . PHP_EOL);
}
} catch (Exception $e) {
echo $e->getMessage();
}
}
class FeedParseBase {
protected $_url;
protected $_data;
protected $_dom;
protected $_article;
protected $_options;
protected $_context;
protected $_og_data;
public function __construct($url) {
$this->_url = $url;
$this->_options = array(
'http' => array(
'method' => "GET",
'header' => "Accept-Language: en-US,en;q=0.8,ja;q=0.6" .
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' .
'Cookie: querylyvid=1147956133'
)
);
$this->_context = stream_context_create($this->_options);
try {
if (!$this->_data = file_get_contents($this->_url, false, $this->_context)) {
throw new Exception("ERROR: 403/404 or otherwise not found" . PHP_EOL);
} else {
$dom = new DOMDocument();
$dom->load($this->_url);
$this->_dom = $dom;
}
} catch (Exception $e) {
echo $e->getMessage();
}
$this->get_article();
}
public function __destruct() {
unset($this->_article);
unset($this->_context);
unset($this->_data);
unset($this->_dom);
unset($this->_og_data);
gc_collect_cycles();
}
public function get_article() {
try {
if (!$this->_dom) {
throw new Exception('ERROR: Dom not loaded' . PHP_EOL);
} else {
$entries = $this->_dom->getElementsByTagName('entry');
foreach ($entries as $entry) {
if ($entry->hasChildNodes()) {
foreach ($entry->childNodes as $childNode) {
$this->_article[$childNode->nodeName] = $childNode->nodeValue;
if ($childNode->nodeName == 'link') {
$href = $childNode->getAttribute('href');
$query_str = parse_url($href, PHP_URL_QUERY);
parse_str($query_str, $query_params);
$query_params['url'] = str_replace('http://m.', 'http://', $query_params['url']);
$this->_article['link'] = $query_params['url'];
}
if ($childNode->nodeName == 'content') {
$this->_article['description'] = $childNode->nodeValue;
}
}
}
$this->get_article_og($this->_article['link']);
$this->save_to_db('news');
}
}
} catch (Exception $e) {
echo $e->getMessage();
}
}
protected function get_article_og($url) {
$url = $this->_article['link'];
try {
if ($html = @file_get_contents($url, false, $this->_context)) {
$doc = new DomDocument();
@$doc->loadHTML($html);
$xpath = new DOMXPath($doc);
$query = '//*/meta[starts-with(@property, \'og:\')]';
$metas = $xpath->query($query);
$rmetas = array();
foreach ($metas as $meta) {
$property = str_replace(':', '_', $meta->getAttribute('property'));
$content = $meta->getAttribute('content');
$rmetas[$property] = $content;
}
$this->_og_data = $rmetas;
sleep(1);
return true;
} else {
throw new Exception('ERROR: OpenGraph = ' . $url . PHP_EOL);
if (R::find('nog', ' link = ?', $url) == NULL) {
$nog = R::dispense('nog');
$nog->url = $url;
R::store($nog);
}
}
} catch (Exception $e) {
echo $e->getMessage();
}
}
protected function save_to_db($category) {
try {
if (isset($this->_article['link'])) {
if (R::find('article', ' link = ?', array($this->_article['link'])) == NULL) {
$article = R::dispense('article');
$article->title = $this->_article['title'];
$article->description = $this->_article['description'];
$article->link = $this->_article['link'];
date_default_timezone_set('America/Chicago');
$date = $this->_article['published'];
$time = strtotime($date);
$fixed = date('Y-m-d H:i:s', $time);
$article->published = $fixed;
$article->setMeta('cast.published','datetime');
$article->category = $category;
if (isset($this->_og_data['og_image'])) {
$article->image_link = $this->_og_data['og_image'];
$filename = md5($this->_og_data['og_image']) . ".jpg";
$article->image_file = $filename;
if (!file_exists('cache/' . $filename)) {
if ($file = @file_get_contents($this->_og_data['og_image'])) {
file_put_contents('cache/' . $filename, $file);
exec('convert cache' . DIRECTORY_SEPARATOR . $filename . ' -background none -resize 480x360 -quality 65 -sharpen 2x4 cache' . DIRECTORY_SEPARATOR . 'thumb' . DIRECTORY_SEPARATOR . $filename);
R::store($article);
}
}
}
} else {
new Exception('ERROR: Not Stored - ' . $this->_article['link'] . PHP_EOL);
}
} else {
throw new Exception('ERROR: Already Exists - ' . $this->_article['link'] . PHP_EOL);
}
} catch (Exception $e) {
echo $e->getMessage();
}
}
}
class GoogleFeed extends FeedParseBase {
}
$feeds = array(
// Cannabis
"https://www.google.com/alerts/feeds/05402891751147148632/10666330494545493357"
);
foreach($feeds as $feed) {
$google = new GoogleFeed($feed);
unset($google);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment