Skip to content

Instantly share code, notes, and snippets.

@mhlipson
Created May 28, 2012 02:39
Show Gist options
  • Save mhlipson/2816922 to your computer and use it in GitHub Desktop.
Save mhlipson/2816922 to your computer and use it in GitHub Desktop.
Extract/save links from posts.. wordpress.
/*
Extracts URLs from post content, stores in post_meta in imploded array
RegEX help from: http://ask.amoeba.co.in/regular-expression-in-php-extract-link-text-href/
*/
add_action('save_post','extract_urls');
function extract_urls($post_id){
if ( !wp_is_post_revision( $post_id ) ) {
$content_post = get_post($my_postid);
preg_match_all("/\<a.*href=\"(.*?)\".*?\>(.*)\<\/a\>+/", stripslashes($content_post->post_content), $matches, PREG_SET_ORDER);
if(!empty($matches)){
delete_post_meta($post_id,'content_urls');
foreach($matches as $url){
$url[] = getPageTitle($url[1]);
$value = implode("%%GG$$%%", $url);
add_post_meta($post_id,'content_urls',$value);
}
} else {
delete_post_meta($post_id,'content_urls');
//update_post_meta($post_id,'content_urls','failed to find urls');
}
}
}
function lipson_return_urls($post_id){
$urls = get_post_meta($post_id,'content_urls');
foreach($urls as $key=>$val){
$return[] = explode('%%GG$$%%',$val);
}
if(isset($return)){
return $return;
} else {
return null;
}
}
// get remote page title
function getPageTitle($Url){
$ch = curl_init();
curl_setopt($ch,CURLOPT_URL,$Url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
$data = curl_exec($ch);
curl_close($ch);
if(strlen($data)>0){
preg_match("/\<title\>(.*)\<\/title\>/",$data,$title);
return $title[1];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment