Skip to content

Instantly share code, notes, and snippets.

@taisang1996
Created April 18, 2017 04:13
Show Gist options
  • Save taisang1996/490d197b09e3854cfa41de02eb3aaf66 to your computer and use it in GitHub Desktop.
Save taisang1996/490d197b09e3854cfa41de02eb3aaf66 to your computer and use it in GitHub Desktop.
Bóc tách dữ liệu zing
<?php
/*
* Cài đặt và chạy như nào?
*
* $ composer require fapbot/goutte
* $ php index.php
*
* Tham khảo thêm :
* - https://github.com/FriendsOfPHP/Goutte
* - http://symfony.com/doc/current/components/dom_crawler.html
*/
require_once __DIR__ . '/vendor/autoload.php';
use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;
$client = new Client();
$crawler = $client->request('GET', "http://news.zing.vn/iphone-xach-tay-van-ban-tot-sau-hang-loat-thong-tin-bat-loi-post738556.html");
print_r(
(new ParseContent($crawler))->parse()
);
class ParseContent
{
private $_crawler;
private $_result;
public function __construct($crawler)
{
$this->_crawler = $crawler;
return $this->parse();
}
public function parse()
{
$this->result['title'] = $this->parseTitle();
$this->result['summary'] = $this->parseSummary();
$this->result['body'] = $this->parseBody();
$this->result['author'] = $this->parseAuthor();
$this->result['tags'] = $this->parseTags();
return $this->result;
}
public function parseTitle()
{
$str = $this->_crawler->filter('#page-article h1')->text();
$str = trim($str);
return $str;
}
public function parseSummary()
{
$str = $this->_crawler->filter('#page-article .the-article-summary')->text();
$str = trim($str);
return $str;
}
public function parseBody()
{
$body = null;
$html = $this->_crawler->filter('#page-article .the-article-body')->html();
$body = trim($html);
return $html;
}
public function parseAuthor()
{
$str = $this->_crawler->filter('#page-article .author')->text();
$str = trim($str);
return $str;
}
public function parseTags()
{
$tags = [];
$tags = $this->_crawler
->filter('#page-article .the-article-tags > *')
->each(function (Crawler $node) {
return trim($node->text());
});
return $tags;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment