Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Fetch context surrounding a link
<?php
/**
* Returns the text surrounding a (back)link. Very heavily inspired by
* WordPress core.
*
* @link https://github.com/WordPress/WordPress/blob/1dcf3eef7a191bd0a6cd21d4382b8b5c5a25c886/wp-includes/class-wp-xmlrpc-server.php#L6929
*
* @param string $html The remote page's source.
* @param string $target The target URL.
*
* @return string The excerpt, or an empty string if the target isn't found.
*/
function my_fetch_context( $html, $target ) {
// Work around bug in `strip_tags()`.
$html = str_replace( '<!DOC', '<DOC', $html );
$html = preg_replace( '/[\r\n\t ]+/', ' ', $html );
$html = preg_replace( '/<\/*(h1|h2|h3|h4|h5|h6|p|th|td|li|dt|dd|pre|caption|input|textarea|button|body)[^>]*>/', "\n\n", $html );
// Remove all script and style tags, including their content.
$html = preg_replace( '@<(script|style)[^>]*?>.*?</\\1>@si', '', $html );
// Just keep the tag we need.
$html = strip_tags( $html, '<a>' );
$p = explode( "\n\n", $html );
$preg_target = preg_quote( $target, '|' );
foreach ( $p as $para ) {
if ( strpos( $para, $target ) !== false ) {
preg_match( '|<a[^>]+?' . $preg_target . '[^>]*>([^>]+?)</a>|', $para, $context );
if ( empty( $context ) ) {
// The URL isn't in a link context; keep looking.
continue;
}
// We're going to use this fake tag to mark the context in a
// bit. The marker is needed in case the link text appears more
// than once in the paragraph.
$excerpt = preg_replace( '|\</?wpcontext\>|', '', $para );
// Prevent really long link text.
if ( strlen( $context[1] ) > 100 ) {
$context[1] = substr( $context[1], 0, 100 ) . '&#8230;';
}
$marker = '<wpcontext>' . $context[1] . '</wpcontext>'; // Set up our marker.
$excerpt = str_replace( $context[0], $marker, $excerpt ); // Swap out the link for our marker.
$excerpt = strip_tags( $excerpt, '<wpcontext>' ); // Strip all tags but our context marker.
$excerpt = trim( $excerpt );
$preg_marker = preg_quote( $marker, '|' );
$excerpt = preg_replace( "|.*?\s(.{0,100}$preg_marker.{0,100})\s.*|s", '$1', $excerpt );
$excerpt = strip_tags( $excerpt ); // Again, to remove the marker wrapper
break;
}
}
if ( empty( $context ) ) {
// Link to target not found.
return '';
}
return '[&#8230;] ' . esc_html( $excerpt ) . ' [&#8230;]';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.