-
-
Save ilyasozkurt/7d26423e5889c7b899860ed66fede63f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Console\Commands; | |
use App\Models\Earthquake; | |
use Carbon\Carbon; | |
use Carbon\CarbonTimeZone; | |
use Illuminate\Console\Command; | |
use Illuminate\Support\Facades\Http; | |
use PHPHtmlParser\Dom; | |
use PHPHtmlParser\Options; | |
class ScrapeEarthquakes extends Command | |
{ | |
/** | |
* The name and signature of the console command. | |
* | |
* @var string | |
*/ | |
protected $signature = 'scrape:earthquakes'; | |
/** | |
* The console command description. | |
* | |
* @var string | |
*/ | |
protected $description = 'This command tries to scrape earthquakes from http://www.koeri.boun.edu.tr/scripts/lasteq.asp'; | |
/** | |
* Unwanted rows that will exclude from data. | |
* | |
* @var string[] | |
*/ | |
private $excluded = [ | |
'RECENT EARTHQUAKES IN TURKEY', | |
'KOERI REGIONAL EARTHQUAKE-TSUNAMI MONITORING CENTER', | |
'(QUICK EPICENTER DETERMINATIONS)', | |
' Magnitude', | |
'Date Time Latit(N) Long(E) Depth(km) MD ML Mw Region', | |
'---------- -------- -------- ------- ---------- ------------ -----------', | |
]; | |
/** | |
* Create a new command instance. | |
* | |
* @return void | |
*/ | |
public function __construct() | |
{ | |
//Format unwanted rows before use | |
foreach ($this->excluded as $index => $row) { | |
//Remove spaces before compare their equality | |
$this->excluded[$index] = $this->clearSpaces($row); | |
} | |
parent::__construct(); | |
} | |
/** | |
* Execute the console command. | |
* | |
* @return int | |
*/ | |
public function handle() | |
{ | |
//Make a request to remote target html source which has data. | |
$response = Http::get('http://www.koeri.boun.edu.tr/scripts/lasteq.asp'); | |
//Check is client successful | |
if ($response->successful()) { | |
//Create an instance from dom parser. | |
$parser = new Dom(); | |
//Change options with that we need. | |
$parser->setOptions( | |
(new Options()) | |
->setWhitespaceTextNode(false) | |
->setCleanupInput(false) | |
->setPreserveLineBreaks(false) | |
->setRemoveDoubleSpace(false) | |
); | |
//Load html source to dom parser | |
$parser->loadStr($response->body()); | |
//Get raw data from html source | |
$rawData = $parser->find('pre')[0]->innerHtml ?? false; | |
//If is there raw data from source. | |
if ($rawData) { | |
//Explode raw data to rows | |
$rows = explode(PHP_EOL, $rawData); | |
//Process all rows with progressbar. | |
$this->withProgressBar($rows, function ($row) { | |
$this->processEarthquake($row); | |
}); | |
} | |
} | |
} | |
/** | |
* @param $row | |
*/ | |
protected function processEarthquake($row) | |
{ | |
//Check row if it is unwanted row or empty | |
if (!empty($this->clearSpaces($row)) && !in_array($this->clearSpaces($row), $this->excluded)) { | |
//Replace the row's double spaces with # to extract data easier | |
$row = $this->convertDoubleSpaces($row); | |
//Slice the row with # | |
$rowParts = explode('#', $row); | |
//A temporary variable to keep row data as columns | |
$earthquakeData = []; | |
//Process all sliced row parts | |
foreach ($rowParts as $part) { | |
//Remove spaces from left and right like " hello " to "hello" | |
$part = trim($part); | |
//Check if part is not empty or not equals # then add it as data | |
if ($part !== '#' && !empty($part)) { | |
$earthquakeData[] = $part; | |
} | |
} | |
//Check is not empty array, this means row has data | |
if (count($earthquakeData)){ | |
//Create a hash value to better check | |
$earthquakeHash = hash('crc32', implode(',', $earthquakeData)); | |
//Create a carbon instance from locale formatted date time | |
$happenedAt = Carbon::createFromFormat('Y.m.d H:i:s', $earthquakeData[0], new CarbonTimeZone('Europe/Istanbul')); | |
//Check if it is not already saved | |
if (!Earthquake::where('hash', $earthquakeHash)->count()){ | |
Earthquake::create([ | |
'latitude' => $earthquakeData[1], | |
'longitude' => $earthquakeData[2], | |
'depth' => $earthquakeData[3], | |
'md' => ($earthquakeData[4] !== '-.-' ? $earthquakeData[4] : null), | |
'ml' => $earthquakeData[5], | |
'mw' => ($earthquakeData[6] !== '-.-' ? $earthquakeData[6] : null), | |
'region' => $earthquakeData[7], | |
'happened_at' => $happenedAt, | |
'hash' => $earthquakeHash, | |
]); | |
} | |
} | |
} | |
} | |
/** | |
* @param $text string The text which we want to clear spaces | |
* @return string | |
*/ | |
protected function clearSpaces($text) | |
{ | |
return str_replace(' ', '', trim($text)); | |
} | |
/** | |
* @param $text string The text which we want to clear | |
* @return string | |
*/ | |
protected function convertDoubleSpaces($text) | |
{ | |
return str_replace(' ', '##', trim($text)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment