Skip to content

Instantly share code, notes, and snippets.

@coodix
Created March 23, 2018 07:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save coodix/d0a83fe284600b9183553ba1e5452cba to your computer and use it in GitHub Desktop.
Save coodix/d0a83fe284600b9183553ba1e5452cba to your computer and use it in GitHub Desktop.
PersistenceCollectionCrawlQueue
<?php
class PersistenceCollectionCrawlQueue extends \Spatie\Crawler\CrawlQueue\CollectionCrawlQueue
{
protected $last_saved_time;
protected function flushQueueOnDisk()
{
if (!$this->last_saved_time || time() - $this->last_saved_time > 60) {
$this->last_saved_time = time();
file_put_contents(QUEUE_FILE, serialize([$this->urls, $this->pendingUrls]));
}
}
public function __construct()
{
$saved_data = file_get_contents(QUEUE_FILE);
if ($saved_data) {
$saved_data = unserialize($saved_data);
$this->urls = collect($saved_data[0]);
$this->pendingUrls = collect($saved_data[1]);
} else {
$this->urls = collect();
$this->pendingUrls = collect();
}
}
public function add(\Spatie\Crawler\CrawlUrl $url): \Spatie\Crawler\CrawlQueue\CrawlQueue
{
parent::add($url);
$this->flushQueueOnDisk();
return $this;
}
public function markAsProcessed(\Spatie\Crawler\CrawlUrl $crawlUrl)
{
parent::markAsProcessed($crawlUrl);
$this->flushQueueOnDisk();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment