Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@arm5077
Last active January 3, 2016 23:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arm5077/8534872 to your computer and use it in GitHub Desktop.
Save arm5077/8534872 to your computer and use it in GitHub Desktop.
Segment of PHP script that scrapes Pittsburgh Mayor Bill Peduto's daily schedule.
<?PHP
$scheduleArray = explode( "<p><strong>", $schedule );
for( $j = 0; $j < count( $scheduleArray ); $j++ ) {
$scheduleArray[$j] = strip_tags( $scheduleArray[$j] );
$scheduleArray[$j] = trim( str_replace( "&nbsp;", "", $scheduleArray[$j] ) );
//parse split press release into export array
$start = 0;
if( $scheduleArray[$j] != "" ) {
$tempArray = "";
$tempArray = Array(); //this is where we'll store each event until it goes into the export array
$lineArray = explode( chr( 10 ), $scheduleArray[$j] );
$tempArray["title"] = decode_entities( $lineArray[0] ); //the title of the event is always the first line, so let's just do that
for( $k = 1; $k < count( $lineArray ); $k++ ) {
// test for time; format if present
if( stripos( $lineArray[$k], "a.m." ) != FALSE or stripos( $lineArray[$k], "p.m." ) or strpos( $lineArray[$k], "Time" ) !== FALSE ) {
//format time string and explode into before and after
$time = str_replace( "Time: ", "", $lineArray[$k] );
$time = str_replace( "Time ", "", $time );
$time = explode( " - ", $time );
//include unformatted time for debugging purposes
$tempArray["rawstart"] = $time[0];
$tempArray["rawend"] = $time[1];
// formatted start date
$tempArray["start"] = date( "H:i:s", strtotime( $time[0] ) );
//check if event has end date... if not, use null
if( $time[1] != "" ) {
$tempArray["end"] = date( "H:i:s", strtotime( $time[1] ) );
}
}
if( stripos( $lineArray[$k], "location" ) === 0 ) {
$tempArray["location"] = str_replace( "Location: ", "", $lineArray[$k] );
}
}
if( isset( $tempArray["location"] ) == FALSE ) {
$tempArray["location"] = "414 Grant St.";
}
if( isset( $tempArray["start"] ) == FALSE ) {
$tempArray["start"] = "";
}
if( isset( $tempArray["end"] ) == FALSE ) {
$tempArray["end"] = "";
}
$export["entries"][] = $tempArray;
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment