Last active
January 3, 2016 23:28
-
-
Save arm5077/8534872 to your computer and use it in GitHub Desktop.
Segment of PHP script that scrapes Pittsburgh Mayor Bill Peduto's daily schedule.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?PHP | |
$scheduleArray = explode( "<p><strong>", $schedule ); | |
for( $j = 0; $j < count( $scheduleArray ); $j++ ) { | |
$scheduleArray[$j] = strip_tags( $scheduleArray[$j] ); | |
$scheduleArray[$j] = trim( str_replace( " ", "", $scheduleArray[$j] ) ); | |
//parse split press release into export array | |
$start = 0; | |
if( $scheduleArray[$j] != "" ) { | |
$tempArray = ""; | |
$tempArray = Array(); //this is where we'll store each event until it goes into the export array | |
$lineArray = explode( chr( 10 ), $scheduleArray[$j] ); | |
$tempArray["title"] = decode_entities( $lineArray[0] ); //the title of the event is always the first line, so let's just do that | |
for( $k = 1; $k < count( $lineArray ); $k++ ) { | |
// test for time; format if present | |
if( stripos( $lineArray[$k], "a.m." ) != FALSE or stripos( $lineArray[$k], "p.m." ) or strpos( $lineArray[$k], "Time" ) !== FALSE ) { | |
//format time string and explode into before and after | |
$time = str_replace( "Time: ", "", $lineArray[$k] ); | |
$time = str_replace( "Time ", "", $time ); | |
$time = explode( " - ", $time ); | |
//include unformatted time for debugging purposes | |
$tempArray["rawstart"] = $time[0]; | |
$tempArray["rawend"] = $time[1]; | |
// formatted start date | |
$tempArray["start"] = date( "H:i:s", strtotime( $time[0] ) ); | |
//check if event has end date... if not, use null | |
if( $time[1] != "" ) { | |
$tempArray["end"] = date( "H:i:s", strtotime( $time[1] ) ); | |
} | |
} | |
if( stripos( $lineArray[$k], "location" ) === 0 ) { | |
$tempArray["location"] = str_replace( "Location: ", "", $lineArray[$k] ); | |
} | |
} | |
if( isset( $tempArray["location"] ) == FALSE ) { | |
$tempArray["location"] = "414 Grant St."; | |
} | |
if( isset( $tempArray["start"] ) == FALSE ) { | |
$tempArray["start"] = ""; | |
} | |
if( isset( $tempArray["end"] ) == FALSE ) { | |
$tempArray["end"] = ""; | |
} | |
$export["entries"][] = $tempArray; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment