-
-
Save hashborgir/c99ac75e2294f038f06e90b895af2419 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
gc_enable(); | |
ini_set('display_errors', 1); | |
error_reporting(E_ALL); | |
if (file_exists('rb.php')) { | |
require_once 'rb.php'; | |
try { | |
if (R::setup('mysql:host=localhost;dbname=', '', '')) { | |
} else { | |
throw new Exception('ERROR: DB not connected' . PHP_EOL); | |
} | |
} catch (Exception $e) { | |
echo $e->getMessage(); | |
} | |
} | |
class FeedParseBase { | |
protected $_url; | |
protected $_data; | |
protected $_dom; | |
protected $_article; | |
protected $_options; | |
protected $_context; | |
protected $_og_data; | |
public function __construct($url) { | |
$this->_url = $url; | |
$this->_options = array( | |
'http' => array( | |
'method' => "GET", | |
'header' => "Accept-Language: en-US,en;q=0.8,ja;q=0.6" . | |
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' . | |
'Cookie: querylyvid=1147956133' | |
) | |
); | |
$this->_context = stream_context_create($this->_options); | |
try { | |
if (!$this->_data = file_get_contents($this->_url, false, $this->_context)) { | |
throw new Exception("ERROR: 403/404 or otherwise not found" . PHP_EOL); | |
} else { | |
$dom = new DOMDocument(); | |
$dom->load($this->_url); | |
$this->_dom = $dom; | |
} | |
} catch (Exception $e) { | |
echo $e->getMessage(); | |
} | |
$this->get_article(); | |
} | |
public function __destruct() { | |
unset($this->_article); | |
unset($this->_context); | |
unset($this->_data); | |
unset($this->_dom); | |
unset($this->_og_data); | |
gc_collect_cycles(); | |
} | |
public function get_article() { | |
try { | |
if (!$this->_dom) { | |
throw new Exception('ERROR: Dom not loaded' . PHP_EOL); | |
} else { | |
$entries = $this->_dom->getElementsByTagName('entry'); | |
foreach ($entries as $entry) { | |
if ($entry->hasChildNodes()) { | |
foreach ($entry->childNodes as $childNode) { | |
$this->_article[$childNode->nodeName] = $childNode->nodeValue; | |
if ($childNode->nodeName == 'link') { | |
$href = $childNode->getAttribute('href'); | |
$query_str = parse_url($href, PHP_URL_QUERY); | |
parse_str($query_str, $query_params); | |
$query_params['url'] = str_replace('http://m.', 'http://', $query_params['url']); | |
$this->_article['link'] = $query_params['url']; | |
} | |
if ($childNode->nodeName == 'content') { | |
$this->_article['description'] = $childNode->nodeValue; | |
} | |
} | |
} | |
$this->get_article_og($this->_article['link']); | |
$this->save_to_db('news'); | |
} | |
} | |
} catch (Exception $e) { | |
echo $e->getMessage(); | |
} | |
} | |
protected function get_article_og($url) { | |
$url = $this->_article['link']; | |
try { | |
if ($html = @file_get_contents($url, false, $this->_context)) { | |
$doc = new DomDocument(); | |
@$doc->loadHTML($html); | |
$xpath = new DOMXPath($doc); | |
$query = '//*/meta[starts-with(@property, \'og:\')]'; | |
$metas = $xpath->query($query); | |
$rmetas = array(); | |
foreach ($metas as $meta) { | |
$property = str_replace(':', '_', $meta->getAttribute('property')); | |
$content = $meta->getAttribute('content'); | |
$rmetas[$property] = $content; | |
} | |
$this->_og_data = $rmetas; | |
sleep(1); | |
return true; | |
} else { | |
throw new Exception('ERROR: OpenGraph = ' . $url . PHP_EOL); | |
if (R::find('nog', ' link = ?', $url) == NULL) { | |
$nog = R::dispense('nog'); | |
$nog->url = $url; | |
R::store($nog); | |
} | |
} | |
} catch (Exception $e) { | |
echo $e->getMessage(); | |
} | |
} | |
protected function save_to_db($category) { | |
try { | |
if (isset($this->_article['link'])) { | |
if (R::find('article', ' link = ?', array($this->_article['link'])) == NULL) { | |
$article = R::dispense('article'); | |
$article->title = $this->_article['title']; | |
$article->description = $this->_article['description']; | |
$article->link = $this->_article['link']; | |
date_default_timezone_set('America/Chicago'); | |
$date = $this->_article['published']; | |
$time = strtotime($date); | |
$fixed = date('Y-m-d H:i:s', $time); | |
$article->published = $fixed; | |
$article->setMeta('cast.published','datetime'); | |
$article->category = $category; | |
if (isset($this->_og_data['og_image'])) { | |
$article->image_link = $this->_og_data['og_image']; | |
$filename = md5($this->_og_data['og_image']) . ".jpg"; | |
$article->image_file = $filename; | |
if (!file_exists('cache/' . $filename)) { | |
if ($file = @file_get_contents($this->_og_data['og_image'])) { | |
file_put_contents('cache/' . $filename, $file); | |
exec('convert cache' . DIRECTORY_SEPARATOR . $filename . ' -background none -resize 480x360 -quality 65 -sharpen 2x4 cache' . DIRECTORY_SEPARATOR . 'thumb' . DIRECTORY_SEPARATOR . $filename); | |
R::store($article); | |
} | |
} | |
} | |
} else { | |
new Exception('ERROR: Not Stored - ' . $this->_article['link'] . PHP_EOL); | |
} | |
} else { | |
throw new Exception('ERROR: Already Exists - ' . $this->_article['link'] . PHP_EOL); | |
} | |
} catch (Exception $e) { | |
echo $e->getMessage(); | |
} | |
} | |
} | |
class GoogleFeed extends FeedParseBase { | |
} | |
$feeds = array( | |
// Cannabis | |
"https://www.google.com/alerts/feeds/05402891751147148632/10666330494545493357" | |
); | |
foreach($feeds as $feed) { | |
$google = new GoogleFeed($feed); | |
unset($google); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment