Skip to content

Instantly share code, notes, and snippets.

@doganoo
Created June 3, 2019 22:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save doganoo/25d01c06cdb3a5a5fb65704a361c2d9f to your computer and use it in GitHub Desktop.
Save doganoo/25d01c06cdb3a5a5fb65704a361c2d9f to your computer and use it in GitHub Desktop.
<?php
use doganoo\PHPAlgorithms\Datastructure\Sets\HashSet;
use doganoo\PHPAlgorithms\Datastructure\Stackqueue\Queue;
require_once 'vendor/autoload.php';
/*
* COMPOSER.JSON CONTENT:
* ==================================================
*
* {
* "require": {
* "doganoo/php-algorithms": "*"
* }
* }
* ==================================================
*/
$domain = "http://www.domainname.de";
$domainShort = "domainname.de";
$queue = new Queue();
$internal = new HashSet();
$external = new HashSet();
$error = new HashSet();
$queue->enqueue($domain);
$internal->add($domain);
while (false === $queue->isEmpty()) {
$url = $queue->dequeue();
$document = new DOMDocument();
$loaded = @$document->loadHTMLFile($url);
if (false === $loaded) {
$error->add($url);
continue;
}
foreach ($document->getElementsByTagName("a") as $aTag) {
$href = $aTag->getAttribute('href');
if (false === filter_var($href, FILTER_VALIDATE_URL)) {
// TODO FIXME find a better solution
$href = $domain . "/" . $href;
}
if ($internal->contains($href) || $external->contains($href)) continue;
if (false !== strpos($href, $domainShort)) {
$queue->enqueue($href);
$internal->add($href);
} else {
$external->add($href);
}
}
}
file_put_contents("export.txt", "--------------------------------- INTERNE LINKS ---------------------------------\n", FILE_APPEND);
foreach ($internal->toArray() as $element) {
file_put_contents("export.txt", $element, FILE_APPEND);
file_put_contents("export.txt", "\n", FILE_APPEND);
}
file_put_contents("export.txt", "--------------------------------- EXTERNE LINKS ---------------------------------\n", FILE_APPEND);
foreach ($external->toArray() as $element) {
file_put_contents("export.txt", $element, FILE_APPEND);
file_put_contents("export.txt", "\n", FILE_APPEND);
}
file_put_contents("export.txt", "--------------------------------- FEHLERHAFTE LINKS ---------------------------------\n", FILE_APPEND);
foreach ($error->toArray() as $element) {
file_put_contents("export.txt", $element, FILE_APPEND);
file_put_contents("export.txt", "\n", FILE_APPEND);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment