Fetch context surrounding a link
<?php | |
/** | |
* Returns the text surrounding a (back)link. Very heavily inspired by | |
* WordPress core. | |
* | |
* @link https://github.com/WordPress/WordPress/blob/1dcf3eef7a191bd0a6cd21d4382b8b5c5a25c886/wp-includes/class-wp-xmlrpc-server.php#L6929 | |
* | |
* @param string $html The remote page's source. | |
* @param string $target The target URL. | |
* | |
* @return string The excerpt, or an empty string if the target isn't found. | |
*/ | |
function my_fetch_context( $html, $target ) { | |
// Work around bug in `strip_tags()`. | |
$html = str_replace( '<!DOC', '<DOC', $html ); | |
$html = preg_replace( '/[\r\n\t ]+/', ' ', $html ); | |
$html = preg_replace( '/<\/*(h1|h2|h3|h4|h5|h6|p|th|td|li|dt|dd|pre|caption|input|textarea|button|body)[^>]*>/', "\n\n", $html ); | |
// Remove all script and style tags, including their content. | |
$html = preg_replace( '@<(script|style)[^>]*?>.*?</\\1>@si', '', $html ); | |
// Just keep the tag we need. | |
$html = strip_tags( $html, '<a>' ); | |
$p = explode( "\n\n", $html ); | |
$preg_target = preg_quote( $target, '|' ); | |
foreach ( $p as $para ) { | |
if ( strpos( $para, $target ) !== false ) { | |
preg_match( '|<a[^>]+?' . $preg_target . '[^>]*>([^>]+?)</a>|', $para, $context ); | |
if ( empty( $context ) ) { | |
// The URL isn't in a link context; keep looking. | |
continue; | |
} | |
// We're going to use this fake tag to mark the context in a | |
// bit. The marker is needed in case the link text appears more | |
// than once in the paragraph. | |
$excerpt = preg_replace( '|\</?wpcontext\>|', '', $para ); | |
// Prevent really long link text. | |
if ( strlen( $context[1] ) > 100 ) { | |
$context[1] = substr( $context[1], 0, 100 ) . '…'; | |
} | |
$marker = '<wpcontext>' . $context[1] . '</wpcontext>'; // Set up our marker. | |
$excerpt = str_replace( $context[0], $marker, $excerpt ); // Swap out the link for our marker. | |
$excerpt = strip_tags( $excerpt, '<wpcontext>' ); // Strip all tags but our context marker. | |
$excerpt = trim( $excerpt ); | |
$preg_marker = preg_quote( $marker, '|' ); | |
$excerpt = preg_replace( "|.*?\s(.{0,100}$preg_marker.{0,100})\s.*|s", '$1', $excerpt ); | |
$excerpt = strip_tags( $excerpt ); // Again, to remove the marker wrapper | |
break; | |
} | |
} | |
if ( empty( $context ) ) { | |
// Link to target not found. | |
return ''; | |
} | |
return '[…] ' . esc_html( $excerpt ) . ' […]'; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment