Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
PHP Script to scrape a website, find certain content and export the found content as vCalendar .ics file for download and subscription. Employs PHP Simple HTML DOM Parser by S.C. Chen, http://simplehtmldom.sourceforge.net/ A great tool to find the right regular expressions: http://txt2re.com/
<?php
include('simple_html_dom.php'); // http://simplehtmldom.sourceforge.net/
$paivays = '(\d+\.\d+\.\d+)'; // date
$aika = '(\d+:\d+)'; // time
$html = file_get_html('http://www.kava.fi/verkkokauppa/naytosluettelo');
$laskuritaulukko = array(); // array for the number of shows per day
$paivaykset = array(); // dates
$ajat = array(); // times
$k = 0; // counter for the found shows / day
$nimet = $html->find('a[href*=saatavuus]'); // find movie names
foreach($html->find('td') as $element) // find dates, showtimes & count / day
{
if (preg_match($paivays, $element->innertext, $matches))
{
if ($k != 0)
{
array_push($laskuritaulukko, $k);
}
array_push($paivaykset, $matches[0]);
$k=0;
}
if (preg_match($aika, $element->innertext))
{
++$k;
array_push($ajat, $element->innertext);
}
}
if (preg_match('</table>', $html))
{
array_push($laskuritaulukko, $k); // push the leftover show count into the array
}
$lkm = count($paivaykset);
$j = 0; // for cumulatively looping through the whole sets of names and showtimes.
header('Content-type: text/calendar; charset=utf-8');
header('Content-Disposition: attachment; filename=kava_elokuvat.ics');
echo 'BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//hacksw/handcal//NONSGML v1.0//EN
X-WR-CALNAME: KAVA Elokuvat
X-WR-TIMEZONE:Europe/Helsinki
';
for ($i=0;$i < $lkm;++$i)
{
$pvm_elok = date_format(date_create_from_format('d.m.Y', $paivaykset[$i]), 'Ymd').'T';
for ($k=0;$k < $laskuritaulukko[$i];++$k)
{
$aika_elok = date_format(date_create_from_format('H:i', $ajat[$j]), 'Hi').'00';
$aika_elok_loppu = date('Hi', strtotime($aika_elok)+7200).'00';
$nimi_elok = $nimet[$j];
echo 'BEGIN:VEVENT
UID:'.$i.$k.'
DTSTAMP;TZID=Europe/Helsinki:'.gmdate('Ymd').'T'.gmdate('His').'Z'.'
DTSTART;TZID=Europe/Helsinki:'.$pvm_elok.$aika_elok.'
DTEND;TZID=Europe/Helsinki:'.$pvm_elok.$aika_elok_loppu.'
SUMMARY:'. html_entity_decode(strip_tags($nimi_elok)).'
END:VEVENT
';
++$j;
}
}
echo 'END:VCALENDAR';
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.