Skip to content

Instantly share code, notes, and snippets.

@imzhi
Last active August 28, 2021 07:24
Show Gist options
  • Save imzhi/ae547a504e8344e6f2333213eddec97e to your computer and use it in GitHub Desktop.
Save imzhi/ae547a504e8344e6f2333213eddec97e to your computer and use it in GitHub Desktop.
抓取豆瓣电影中韩国的最新的120条名称与LOGO
<?php
require 'vendor/autoload.php';
use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverPlatform;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\Remote\DesiredCapabilities;
use Facebook\WebDriver\WebDriverExpectedCondition;
use Facebook\WebDriver\Remote\WebDriverCapabilityType;
$options = new ChromeOptions();
$options->addArguments([
'--window-size=1400,900',
'--no-sandbox',
]);
$capabilities = DesiredCapabilities::chrome();
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
$host = 'http://localhost:4444';
$driver = RemoteWebDriver::create($host, $capabilities);
$driver->manage()->timeouts()->implicitlyWait(10);
$driver->get('https://movie.douban.com/tag/#/?sort=R&range=0,10&tags=%E9%9F%A9%E5%9B%BD');
for ($i = 1; $i <= 5; $i++) {
sleep(1);
$driver->findElement(WebDriverBy::cssSelector('a.more'))->click();
$driver->wait(20, 1000)->until(
function () use ($driver, $i) {
$elements = $driver->findElements(WebDriverBy::cssSelector('#app .list-wp .item'));
echo sprintf("count: %d, num: %d\n", count($elements), $i * 20);
return count($elements) > $i * 20;
}
);
}
$dir = __DIR__ . '/douban';
!is_dir($dir) && mkdir($dir, 0755);
$item_elements = $driver->findElements(WebDriverBy::cssSelector('#app .list-wp .item'));
foreach ($item_elements as $item) {
$alt = $item->findElement(WebDriverBy::cssSelector('.pic img'))->getAttribute('alt');
$alt = preg_replace('/[\/\\\\:*?"<>|]/', '', $alt);
$src = $item->findElement(WebDriverBy::cssSelector('.pic img'))->getAttribute('src');
$ext = pathinfo($src, PATHINFO_EXTENSION);
file_put_contents($dir . '/' . $alt . '.' . $ext, file_get_contents($src));
echo sprintf("alt: %s, src: %s\n", $alt, $src);
usleep(50);
}
echo "complete\n";
$driver->quit();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment