Skip to content

Instantly share code, notes, and snippets.

@huichops
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huichops/4b1dae808c1bd5b7a208 to your computer and use it in GitHub Desktop.
Save huichops/4b1dae808c1bd5b7a208 to your computer and use it in GitHub Desktop.
Small script to parse the data from SIIAU class schedules list into an associative array
Script para sacar los datos de la oferta academica del SIIAU en PHP
{
"require": {
"fabpot/goutte": "~3.1"
}
}
<?php
require __DIR__ . '/vendor/autoload.php';
use Symfony\Component\DomCrawler\Crawler;
use Goutte\Client;
class Siiau {
// Keys for the data and the URL won't change
const URL = 'http://consulta.siiau.udg.mx/wco/sspseca.forma_consulta';
const SCHEDULE_KEYS = ['time', 'days', 'building', 'classroom'];
const TIME_KEYS = ['start', 'finish'];
const KEYS = ['nrc', 'key', 'topic', 'section', 'credits',
'limit', 'available', 'schedule', 'professor'];
// Neither the indexes of the td elements
const ENTRY_OFFSET = 3;
const SCHEDULE_OFFSET = 4;
private $client;
public $entries;
public function __construct() {
$this->client = new Client();
$this->entries = [];
}
/**
* Parse the input data from the SIIAU
* html response
*
* @return Array
*/
public function parse()
{
$submitText = 'Consultar';
$selector = 'body > style + table > tr';
$form = $this
->client
->request('POST', self::URL)
->selectButton($submitText)->form();
$crawler = $this->client->submit($form);
$entries = $crawler
->filter($selector)
->slice(self::ENTRY_OFFSET);
$this->entries = $entries->each(function(Crawler $entry, $i)
{
$values = $entry
->children()
->slice(1)
->each($this->parseEntry());
return array_combine(self::KEYS, $values);
});
}
/**
* Returns the function used to properly
* set the values of each entry in an
* associative array
*
* @return Function
*/
private function parseEntry()
{
return function(Crawler $value, $i)
{
if (self::KEYS[$i] == 'schedule') {
$value = $this->parseSchedule($value);
} elseif (self::KEYS[$i] == 'professor') {
$value = $this->parseProfessor($value);
}
// Schedules returns an array but the rest
// of the keys returns a Crawler
// so we check for it
$subject = is_array($value) ? $value : $value->text();
$value = preg_replace('/(\s)+/', ' ', $subject);
return $value;
};
}
/**
* Put the schedule properties in an associative array
*
* @param schedules
* @return Array
*/
private function parseSchedule($schedules)
{
$selector = 'table > tr';
$schedules = $schedules
->filter($selector)
->each(function(Crawler $row, $i) {
$row = $row
->children()
->slice(1, self::SCHEDULE_OFFSET)
->each(function(Crawler $cell, $i) {
$value = $cell->text();
if (self::SCHEDULE_KEYS[$i] == 'days') {
$value = str_split(preg_replace('/([\.\s])+/', '', $value));
} elseif (self::SCHEDULE_KEYS[$i] == 'time') {
$value = explode('-', $value);
$value = array_combine(self::TIME_KEYS, $value);
}
return $value;
});
return array_combine(self::SCHEDULE_KEYS, $row);
});
return $schedules;
}
/**
* Gets only the professor name from the entry
*
* @param professor
* @return Crawler
*/
private function parseProfessor($professor)
{
$selector = 'td:last-child';
return $professor->filter($selector);
}
}
// Lets create a new instance and dump the values
$siiau = new Siiau();
$siiau->parse();
var_dump($siiau->entries);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment