Created
February 6, 2012 15:45
-
-
Save markrickert/1752759 to your computer and use it in GitHub Desktop.
Scraping nonstandard data from HTML using phpQuery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//Create new instance of phpQuery with the poorly formatted HTML string | |
phpQuery::newDocument($document_html_string); | |
$my_data = array(); //Init array for holding the records | |
foreach(pq("#main table tr") as $block) { | |
//Get the key/value by selecting the table header element. | |
//We namespace it to the current block so we don't | |
// get _all_ the page's TH elements. | |
$key = pq('th', $block)->text(); | |
$key = strtolower(str_replace(" ", "_", $key)); //clean the key | |
//Set the value of the text within the block's TD field | |
$value = pq('td', $block)->text(); | |
//Insert the data into the array as a key/value pair | |
$my_data[$key] = $value; | |
} | |
//Check something outside the table element | |
//In this case, we're checking to see if there's some | |
// incorrectly syntax inside a UL that indicates a true/false value. | |
if(trim(pq('ul#this_list')->text()) != '--') | |
$my_data['this_tf_value'] = 'true'; | |
else | |
$my_data['this_tf_value'] = 'false'; | |
//Do stuff with the $my_data array. | |
// ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment