Created
November 24, 2017 15:05
-
-
Save mducharme/1fb301b955c2eba16471c30f7b43a5d9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Charcoal\Admin\Script\Tools; | |
use Exception; | |
use Psr\Http\Message\RequestInterface; | |
use Psr\Http\Message\ResponseInterface; | |
use Pimple\Container; | |
use Goutte\Client as GoutteClient; | |
use GuzzleHttp\Client as GuzzleClient; | |
use GuzzleHttp\TransferStats; | |
use Charcoal\Admin\AdminScript; | |
/** | |
* | |
*/ | |
class CheckLinksScript extends AdminScript | |
{ | |
/** | |
* @var string | |
*/ | |
private $startUrl; | |
private $parsedStartUrl; | |
/** | |
* @var integer | |
*/ | |
private $maxLevel; | |
/** | |
* @var string[] | |
*/ | |
private $processedUrls = []; | |
/** | |
* @var GuzzleClient | |
*/ | |
private $guzzleClient; | |
/** | |
* @var GoutteClient | |
*/ | |
private $goutteClient; | |
public function __construct($data = null) | |
{ | |
parent::__construct($data); | |
$this->guzzleClient = new GuzzleClient(); | |
$this->goutteClient = new GoutteClient(); | |
$this->goutteClient->setClient($this->guzzleClient); | |
} | |
/** | |
* @return array | |
*/ | |
public function defaultArguments() | |
{ | |
$arguments = [ | |
'url' => [ | |
'longPrefix' => 'url', | |
'description' => 'Object type', | |
'defaultValue' => $this->baseUrl() | |
], | |
'output-dir' => [ | |
'longPrefix' => 'output-dir', | |
'description' => 'Output path (relative) where the static files will be stored.', | |
'defaultValue' => 'www/static/' | |
], | |
'max-level' => [ | |
'longPrefix' => 'max-level', | |
'description' => 'Maximum recursive level.', | |
'defaultValue' => 2 | |
] | |
]; | |
$arguments = array_merge(parent::defaultArguments(), $arguments); | |
return $arguments; | |
} | |
public function run(RequestInterface $request, ResponseInterface $response) | |
{ | |
unset($request); | |
$climate = $this->climate(); | |
$climate->arguments->parse(); | |
$this->startUrl = rtrim($climate->arguments->get('url'), '/').'/'; | |
$this->maxLevel = $climate->arguments->get('max-level'); | |
$climate->underline()->out( | |
sprintf('Check Broken Links ("%s")', $this->startUrl) | |
); | |
$this->checkUrl($this->startUrl); | |
$this->retrieveLinks($this->startUrl, 0); | |
return $response; | |
} | |
private function checkUrl($url) | |
{ | |
$url = $this->absoluteLink($url); | |
if (in_array($url, $this->processedUrls)) { | |
return; | |
} | |
$this->processedUrls[] = $url; | |
try { | |
$response = $this->guzzleClient->request('GET', $url, [ | |
'on_stats' => function (TransferStats $stats) { | |
if ($stats->hasResponse()) { | |
$code = $stats->getResponse()->getStatusCode(); | |
if ($code > 200 && $code < 400) { | |
$this->climate()->orange(sprintf('[%s] %s - %sms', | |
$code, | |
$stats->getEffectiveUri(), | |
number_format(1000 * $stats->getTransferTime(), 0) | |
)); | |
} else if ($code >= 400) { | |
$this->climate()->error(sprintf('[%s] %s - %sms', | |
$code, | |
$stats->getEffectiveUri(), | |
number_format(1000 * $stats->getTransferTime(), 0) | |
)); | |
} else { | |
$this->climate()->out(sprintf('[%s] %s - %sms', | |
$code, | |
$stats->getEffectiveUri(), | |
number_format(1000 * $stats->getTransferTime(), 0) | |
)); | |
} | |
} | |
} | |
]); | |
} catch (Exception $e) { | |
$this->climate()->error($e->getMessage().' - '.$url); | |
} | |
} | |
/** | |
* @param string $url The URL to retrieve links from. | |
* @param integer $level The current level. | |
* @return void | |
*/ | |
private function retrieveLinks($url, $level) | |
{ | |
$crawler = $this->goutteClient->request('GET', $url); | |
$crawler->filter('a')->each(function($item) use ($level) { | |
$href = $item->attr('href'); | |
$this->checkUrl($href); | |
if ($this->isInternalLink($href) && ($level < $this->maxLevel)) { | |
$this->retrieveLinks($href, ++$level); | |
} | |
}); | |
} | |
/** | |
* @return boolean | |
*/ | |
private function isInternalLink($url) | |
{ | |
$parsed = parse_url($url); | |
if (!isset($parsed['host'])) { | |
return true; | |
} | |
if ($parsed['host'] === $this->parsedStartUrl['host']) { | |
return true; | |
} | |
return false; | |
} | |
private function absoluteLink($url) | |
{ | |
if (strstr($url, 'http') === false) { | |
return $this->startUrl.ltrim($url, '/'); | |
} else { | |
return $url; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment