Last active
August 29, 2015 14:26
-
-
Save huichops/4b1dae808c1bd5b7a208 to your computer and use it in GitHub Desktop.
Small script to parse the data from SIIAU class schedules list into an associative array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Script para sacar los datos de la oferta academica del SIIAU en PHP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"require": { | |
"fabpot/goutte": "~3.1" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require __DIR__ . '/vendor/autoload.php'; | |
use Symfony\Component\DomCrawler\Crawler; | |
use Goutte\Client; | |
class Siiau { | |
// Keys for the data and the URL won't change | |
const URL = 'http://consulta.siiau.udg.mx/wco/sspseca.forma_consulta'; | |
const SCHEDULE_KEYS = ['time', 'days', 'building', 'classroom']; | |
const TIME_KEYS = ['start', 'finish']; | |
const KEYS = ['nrc', 'key', 'topic', 'section', 'credits', | |
'limit', 'available', 'schedule', 'professor']; | |
// Neither the indexes of the td elements | |
const ENTRY_OFFSET = 3; | |
const SCHEDULE_OFFSET = 4; | |
private $client; | |
public $entries; | |
public function __construct() { | |
$this->client = new Client(); | |
$this->entries = []; | |
} | |
/** | |
* Parse the input data from the SIIAU | |
* html response | |
* | |
* @return Array | |
*/ | |
public function parse() | |
{ | |
$submitText = 'Consultar'; | |
$selector = 'body > style + table > tr'; | |
$form = $this | |
->client | |
->request('POST', self::URL) | |
->selectButton($submitText)->form(); | |
$crawler = $this->client->submit($form); | |
$entries = $crawler | |
->filter($selector) | |
->slice(self::ENTRY_OFFSET); | |
$this->entries = $entries->each(function(Crawler $entry, $i) | |
{ | |
$values = $entry | |
->children() | |
->slice(1) | |
->each($this->parseEntry()); | |
return array_combine(self::KEYS, $values); | |
}); | |
} | |
/** | |
* Returns the function used to properly | |
* set the values of each entry in an | |
* associative array | |
* | |
* @return Function | |
*/ | |
private function parseEntry() | |
{ | |
return function(Crawler $value, $i) | |
{ | |
if (self::KEYS[$i] == 'schedule') { | |
$value = $this->parseSchedule($value); | |
} elseif (self::KEYS[$i] == 'professor') { | |
$value = $this->parseProfessor($value); | |
} | |
// Schedules returns an array but the rest | |
// of the keys returns a Crawler | |
// so we check for it | |
$subject = is_array($value) ? $value : $value->text(); | |
$value = preg_replace('/(\s)+/', ' ', $subject); | |
return $value; | |
}; | |
} | |
/** | |
* Put the schedule properties in an associative array | |
* | |
* @param schedules | |
* @return Array | |
*/ | |
private function parseSchedule($schedules) | |
{ | |
$selector = 'table > tr'; | |
$schedules = $schedules | |
->filter($selector) | |
->each(function(Crawler $row, $i) { | |
$row = $row | |
->children() | |
->slice(1, self::SCHEDULE_OFFSET) | |
->each(function(Crawler $cell, $i) { | |
$value = $cell->text(); | |
if (self::SCHEDULE_KEYS[$i] == 'days') { | |
$value = str_split(preg_replace('/([\.\s])+/', '', $value)); | |
} elseif (self::SCHEDULE_KEYS[$i] == 'time') { | |
$value = explode('-', $value); | |
$value = array_combine(self::TIME_KEYS, $value); | |
} | |
return $value; | |
}); | |
return array_combine(self::SCHEDULE_KEYS, $row); | |
}); | |
return $schedules; | |
} | |
/** | |
* Gets only the professor name from the entry | |
* | |
* @param professor | |
* @return Crawler | |
*/ | |
private function parseProfessor($professor) | |
{ | |
$selector = 'td:last-child'; | |
return $professor->filter($selector); | |
} | |
} | |
// Lets create a new instance and dump the values | |
$siiau = new Siiau(); | |
$siiau->parse(); | |
var_dump($siiau->entries); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment