Skip to content

Instantly share code, notes, and snippets.

@drjamesj
Created May 23, 2021 11:56
Show Gist options
  • Save drjamesj/22c6ef91945e35bbd3d8f9a4557955d8 to your computer and use it in GitHub Desktop.
Save drjamesj/22c6ef91945e35bbd3d8f9a4557955d8 to your computer and use it in GitHub Desktop.
Image Downloader
<?php
require 'vendor/autoload.php';
use Goutte\Client;
class ImageDownloader
{
private $images = [];
private function crawl(string $url = ''): void
{
$client = new Client();
$crawler = $client->request('GET', $url);
$uri = $client->getHistory()->current()->getUri();
$parsed_uri = parse_url($uri);
$crawler->filter('img')->each(function ($node) use ($uri, $parsed_uri) {
$src = $node->attr('src');
if (strpos($src, 'http') !== 0) {
if ($src[0] == '/') {
$src = $parsed_uri['scheme'] . '://' . $parsed_uri['host'] . $src;
} else {
$src = $uri . '/' . $src;
}
}
$this->images[] = $src;
});
}
public function run(string $url = ''): void
{
$this->crawl($url);
foreach ($this->images as $image) {
file_put_contents('output/' . pathinfo($image)['basename'], file_get_contents($image));
}
}
}
$imageDownloader = new ImageDownloader();
$imageDownloader->run('https://www.google.com');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment