Skip to content

Instantly share code, notes, and snippets.

@eshelman
Created July 22, 2013 01:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save eshelman/6050765 to your computer and use it in GitHub Desktop.
Save eshelman/6050765 to your computer and use it in GitHub Desktop.
Patches for WordPress "Broken Link Checker" Plugin (http://wordpress.org/support/plugin/broken-link-checker) Add full HTML search for Custom Fields. To apply these patches, first update Broken Link Checker to version 1.8.2. Then apply these patches inside your WordPress plugins directory (wp-content/plugins/broken-link-checker/)
--- ~/broken-link-checker/includes/any-post.php 2013-07-16 17:31:29.000000000 -0400
+++ broken-link-checker/includes/any-post.php 2013-07-21 21:27:17.000000000 -0400
@@ -74,6 +74,7 @@
//Highlight and nofollow broken links in posts & pages
if ( $this->plugin_conf->options['mark_broken_links'] || $this->plugin_conf->options['nofollow_broken_links'] ){
add_filter( 'the_content', array(&$this, 'hook_the_content') );
+ add_filter( 'blc_mark_broken_links', array(&$this, 'hook_the_content') );
if ( $this->plugin_conf->options['mark_broken_links'] && !empty( $this->plugin_conf->options['broken_link_css'] ) ){
add_action( 'wp_head', array(&$this,'hook_wp_head') );
}
@@ -275,11 +276,12 @@
SELECT instances.raw_url
FROM {$wpdb->prefix}blc_instances AS instances JOIN {$wpdb->prefix}blc_links AS links
ON instances.link_id = links.link_id
- WHERE
- instances.container_type = %s
+ WHERE (
+ (instances.container_type = %s OR instances.container_type = 'custom_field')
AND instances.container_id = %d
AND links.broken = 1
- AND parser_type = 'link'
+ AND (parser_type = 'link' OR parser_type = 'metadata')
+ )
";
$q = $wpdb->prepare($q, $post->post_type, $post->ID);
$links = $wpdb->get_results($q, ARRAY_A);
--- ~/broken-link-checker/modules/containers/custom_field.php 2013-07-21 10:26:08.000000000 -0400
+++ broken-link-checker/modules/containers/custom_field.php 2013-07-21 21:30:40.000000000 -0400
@@ -91,7 +91,7 @@
}
/**
- * "Unlink"-ing a custom fields removes all metadata fields that contain the specified URL.
+ * Remove all links that have a certain URL, leaving anchor text intact.
*
* @param string $field_name
* @param blcParser $parser
@@ -100,22 +100,28 @@
* @return bool|WP_Error True on success, or an error object if something went wrong.
*/
function unlink($field_name, $parser, $url, $raw_url =''){
- $rez = delete_metadata($this->meta_type, $this->container_id, $field_name, $raw_url);
- if ( $rez ){
- return true;
- } else {
- return new WP_Error(
- 'metadata_delete_failed',
- sprintf(
- __("Failed to delete the meta field '%s' on %s [%d]", 'broken-link-checker'),
- $field_name,
- $this->meta_type,
- $this->container_id
- )
- );
+ //Get the current values of the field that needs to be edited.
+ $old_value = $this->get_field($field_name);
+
+ //Get the new field values
+ $unlink_result = $parser->unlink($old_value, $url, $raw_url);
+ if ( is_wp_error($unlink_result) ){
+ return $unlink_result;
+ }
+
+ //Update the field with the new value returned by the parser.
+ //Notice that WordPress allows multiple metadata values with the same name -
+ //each is differentiated by its contents.
+ foreach($unlink_result['changed'] as $value) {
+ $update_result = $this->update_field( $field_name, $value['content'], $value['old_content'] );
+ if ( is_wp_error($update_result) ){
+ return $update_result;
}
}
+ return true;
+ }
+
/**
* Change a meta field containing the specified URL to a new URL.
*
@@ -143,25 +149,23 @@
}
//Get the current values of the field that needs to be edited.
- //The default metadata parser ignores them, but we're still going
- //to set this argument to a valid value in case someone writes a
- //custom meta parser that needs it.
$old_value = $this->get_field($field_name);
- //Get the new field value (a string).
+ //Get the new field values
$edit_result = $parser->edit($old_value, $new_url, $old_url, $old_raw_url);
if ( is_wp_error($edit_result) ){
return $edit_result;
}
//Update the field with the new value returned by the parser.
- //Notice how $old_raw_url is used instead of $old_value. $old_raw_url contains the entire old
- //value of the metadata field (see blcMetadataParser::parse()) and thus can be used to
- //differentiate between multiple meta fields with identical names.
- $update_result = $this->update_field( $field_name, $edit_result['content'], $old_raw_url );
+ //Notice that WordPress allows multiple metadata values with the same name -
+ //each is differentiated by its contents.
+ foreach($edit_result['changed'] as $value) {
+ $update_result = $this->update_field( $field_name, $value['content'], $value['old_content'] );
if ( is_wp_error($update_result) ){
return $update_result;
}
+ }
//Return the new "raw" URL.
return $edit_result['raw_url'];
--- ~/broken-link-checker/modules/parsers/metadata.php 2013-07-21 21:14:00.406206156 -0400
+++ broken-link-checker/modules/parsers/metadata.php 2013-07-21 21:24:19.000000000 -0400
@@ -34,75 +34,201 @@
}
foreach($content as $value){
- //The complete contents of the meta field are stored in raw_url.
- //This is useful for editing/unlinking, when one may need to
- //distinguish between multiple fields with the same name.
- $raw_url = $value;
-
- //If this is a multiline metadata field take only the first line (workaround for the 'enclosure' field).
- //$url = trim( array_shift( explode("\n", $value) ) );
- $url = trim($value);
+ //remove all <code></code> blocks first
+ $value = preg_replace('/<code[^>]*>.+?<\/code>/si', ' ', $value);
+
+ //Find links
+ $params = array(
+ 'base_url' => $base_url,
+ 'default_link_text' => $default_link_text,
+ );
+ $instances = array_merge(
+ $this->map($value, array($this, 'parser_callback'), $params),
+ $instances
+ );
+ }
+
+ //The parser callback returns NULL when it finds an invalid link. Filter out those nulls
+ //from the list of instances.
+ $instances = array_filter($instances);
+
+ return $instances;
+ }
+
+ /**
+ * blcMetadataParser::parser_callback()
+ *
+ * @access private
+ *
+ * @param array $link
+ * @param array $params
+ * @return blcLinkInstance|null
+ */
+ function parser_callback($link, $params){
+ $base_url = $params['base_url'];
+
+ $url = $raw_url = $link['href'];
+ $url = trim($url);
+ //FB::log($url, "Found link");
+
+ //Sometimes links may contain shortcodes. Execute them.
+ $url = do_shortcode($url);
+
+ //Skip empty URLs
+ if ( empty($url) ){
+ return null;
+ };
//Attempt to parse the URL
$parts = @parse_url($url);
if(!$parts) {
- return $instances; //Ignore invalid URLs
+ return null; //Skip invalid URLs
};
if ( !isset($parts['scheme']) ){
//No sheme - likely a relative URL. Turn it into an absolute one.
- $url = $this->relative2absolute($url, $base_url);
+ $url = $this->relative2absolute($url, $base_url); //$base_url comes from $params
+ }
- //Skip invalid URLs (again)
+ //Skip invalid links (again)
if ( !$url || (strlen($url)<6) ) {
- return $instances;
- }
+ return null;
}
+ $text = strip_tags( $link['#link_text'] );
+
//The URL is okay, create and populate a new link instance.
$instance = new blcLinkInstance();
$instance->set_parser($this);
$instance->raw_url = $raw_url;
- $instance->link_text = $default_link_text;
+ $instance->link_text = $text;
$link_obj = new blcLink($url); //Creates or loads the link
$instance->set_link($link_obj);
- $instances[] = $instance;
- }
-
- return $instances;
+ return $instance;
}
/**
* Change the URL in a metadata field to another one.
*
* This is tricky because there can be multiple metadata fields with the same name
- * but different values. So we ignore $content (which might be an array of multiple
- * metadata values) and use the old raw_url that we stored when parsing the field(s)
- * instead.
+ * but different values. We'll go through all of them looking for the link which
+ * is broken. Only those which change will be returned.
*
* @see blcMetadataParser::parse()
*
- * @param string $content Ignored.
- * @param string $new_url The new URL.
- * @param string $old_url Ignored.
- * @param string $old_raw_url The current meta value.
+ * @param string|array $content Look for links in this object.
+ * @param string $new_url Change the links to this URL.
+ * @param string $old_url The URL to look for.
+ * @param string $old_raw_url The raw, not-normalized URL of the links to look for.
*
- * @return array|WP_Error
+ * @return array|WP_Error If successful, the return values will be associative arrays with three
+ * keys : 'content' - the modified content, 'old_content' - the original content, and 'raw_url' - the new raw, non-normalized URL used
+ * for the modified links. In most cases, the returned raw_url will be equal to the new_url.
*/
function edit($content, $new_url, $old_url, $old_raw_url){
- //For multiline fields (like 'enclosure') we only want to change the first line.
- $lines = explode("\n", $old_raw_url);
- array_shift($lines); //Discard the old first line
- array_unshift($lines, $new_url); //Insert the new URL in its place.
- $content = implode("\n", $lines);
+ //Save the old & new URLs for use in the edit callback.
+ $args = array(
+ 'old_url' => $old_raw_url,
+ 'new_url' => $new_url,
+ );
+
+ if ( !is_array($content) ){
+ $content = array($content);
+ }
+
+ $changed = array();
+ foreach($content as $value){
+ //Find all links and replace those that match $old_url.
+ $new_value = $this->multi_edit($value, array(&$this, 'edit_callback'), $args);
+ if($new_value != $value) {
+ $changed[] = array(
+ 'content' => $new_value,
+ 'old_content' => $value
+ );
+ }
+ }
+
+ return array('changed' => $changed, 'raw_url' => $new_url);
+ }
+
+ function edit_callback($link, $params){
+ if ($link['href'] == $params['old_url']){
return array(
- 'content' => $content,
- 'raw_url' => $new_url,
+ 'href' => $params['new_url'],
+ );
+ } else {
+ return $link['#raw'];
+ }
+ }
+
+ /**
+ * Remove all links that have a certain URL, leaving anchor text intact.
+ *
+ * @param string $content Look for links in this string.
+ * @param string $url The URL to look for.
+ * @param string $raw_url The raw, non-normalized version of the URL to look for. Optional.
+ * @return string Input string with all matching links removed.
+ */
+ function unlink($content, $url, $raw_url){
+ if ( empty($raw_url) ){
+ $raw_url = $url;
+ }
+
+ $args = array(
+ 'old_url' => $raw_url,
);
+
+ if ( !is_array($content) ){
+ $content = array($content);
+ }
+
+ $changed = array();
+
+ foreach($content as $value){
+ //Find all links and remove those that match $raw_url.
+ $new_value = $this->multi_edit($value, array(&$this, 'unlink_callback'), $args);
+ if($new_value != $value) {
+ $changed[] = array(
+ 'content' => $new_value,
+ 'old_content' => $value
+ );
+ }
+ }
+
+ return array('changed' => $changed);
+ }
+
+ /**
+ * blcMetadataParser::unlink_callback()
+ *
+ * @access private
+ *
+ * @param array $link
+ * @param array $params
+ * @return string
+ */
+ function unlink_callback($link, $params){
+ //Skip links that don't match the specified URL
+ if ($link['href'] != $params['old_url']){
+ return $link['#raw'];
+ }
+
+ $config = blc_get_configuration();
+ if ( $config->options['mark_removed_links'] ){
+ //Leave only the anchor text + the removed_link CSS class
+ return sprintf(
+ '<span class="removed_link" title="%s">%s</span>',
+ esc_attr($link['href']),
+ $link['#link_text']
+ );
+ } else {
+ //Just the anchor text
+ return $link['#link_text'];
+ }
}
/**
@@ -120,8 +246,9 @@
);
$field_html = sprintf(
- '<code>%s</code>',
- $instance->container_field
+ '<code>%s</code><br />%s',
+ $instance->container_field,
+ $instance->link_text
);
if ( $context != 'email' ){
@@ -130,4 +257,139 @@
return $field_html;
}
+
+ /**
+ * Apply a callback function to all HTML links found in a string and return the results.
+ *
+ * The link data array will contain at least these keys :
+ * 'href' - the URL of the link (with htmlentitydecode() already applied).
+ * '#raw' - the raw link code, e.g. the entire '<a href="...">...</a>' tag of a HTML link.
+ * '#offset' - the offset within $content at which the first character of the link tag was found.
+ * '#link_text' - the link's anchor text, if any. May contain HTML tags.
+ *
+ * Any attributes of the link tag will also be included in the returned array as attr_name => attr_value
+ * pairs. This function will also automatically decode any HTML entities found in attribute values.
+ *
+ * @see blcParser::map()
+ *
+ * @param string $content A text string to parse for links.
+ * @param callback $callback Callback function to apply to all found links.
+ * @param mixed $extra If the optional $extra param. is supplied, it will be passed as the second parameter to the function $callback.
+ * @return array An array of all detected links after applying $callback to each of them.
+ */
+ function map($content, $callback, $extra = null){
+ $results = array();
+
+ //Find all links
+ $links = blcUtility::extract_tags($content, 'a', false, true);
+
+ //Iterate over the links and apply $callback to each
+ foreach($links as $link){
+
+ //Massage the found link into a form required for the callback function
+ $param = $link['attributes'];
+ $param = array_merge(
+ $param,
+ array(
+ '#raw' => $link['full_tag'],
+ '#offset' => $link['offset'],
+ '#link_text' => $link['contents'],
+ 'href' => isset($link['attributes']['href'])?$link['attributes']['href']:'',
+ )
+ );
+
+ //Prepare arguments for the callback
+ $params = array($param);
+ if ( isset($extra) ){
+ $params[] = $extra;
+ }
+
+ //Execute & store :)
+ $results[] = call_user_func_array($callback, $params);
+ }
+
+ return $results;
+ }
+
+ /**
+ * Modify all HTML links found in a string using a callback function.
+ *
+ * The callback function should return either an associative array or a string. If
+ * a string is returned, the parser will replace the current link with the contents
+ * of that string. If an array is returned, the current link will be modified/rebuilt
+ * by substituting the new values for the old ones.
+ *
+ * htmlentities() will be automatically applied to attribute values (but not to #link_text).
+ *
+ * @see blcParser::multi_edit()
+ *
+ * @param string $content A text string containing the links to edit.
+ * @param callback $callback Callback function used to modify the links.
+ * @param mixed $extra If supplied, $extra will be passed as the second parameter to the function $callback.
+ * @return string The modified input string.
+ */
+ function multi_edit($content, $callback, $extra = null){
+ //Just reuse map() + a little helper func. to apply the callback to all links and get modified links
+ $modified_links = $this->map($content, array(&$this, 'execute_edit_callback'), array($callback, $extra));
+
+ //Replace each old link with the modified one
+ $offset = 0;
+ foreach($modified_links as $link){
+ if ( isset($link['#new_raw']) ){
+ $new_html = $link['#new_raw'];
+ } else {
+ //Assemble the new link tag
+ $new_html = '<a';
+ foreach ( $link as $name => $value ){
+
+ //Skip special keys like '#raw' and '#offset'
+ if ( substr($name, 0, 1) == '#' ){
+ continue;
+ }
+
+ $new_html .= sprintf(' %s="%s"', $name, esc_attr( $value ));
+ }
+ $new_html .= '>' . $link['#link_text'] . '</a>';
+ }
+
+ $content = substr_replace($content, $new_html, $link['#offset'] + $offset, strlen($link['#raw']));
+ //Update the replacement offset
+ $offset += ( strlen($new_html) - strlen($link['#raw']) );
+ }
+
+ return $content;
+ }
+
+ /**
+ * Helper function for blcMetadataParser::multi_edit()
+ * Applies the specified callback function to each link and merges
+ * the result with the current link attributes. If the callback returns
+ * a replacement HTML tag instead, it will be stored in the '#new_raw'
+ * key of the return array.
+ *
+ * @access protected
+ *
+ * @param array $link
+ * @param array $info The callback function and the extra argument to pass to that function (if any).
+ * @return array
+ */
+ function execute_edit_callback($link, $info){
+ list($callback, $extra) = $info;
+
+ //Prepare arguments for the callback
+ $params = array($link);
+ if ( isset($extra) ){
+ $params[] = $extra;
+ }
+
+ $new_link = call_user_func_array($callback, $params);
+
+ if ( is_array($new_link) ){
+ $link = array_merge($link, $new_link);
+ } elseif (is_string($new_link)) {
+ $link['#new_raw'] = $new_link;
+ }
+
+ return $link;
+ }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment