Last active
August 29, 2015 13:55
-
-
Save jchristopher/8709539 to your computer and use it in GitHub Desktop.
When attributing Media search result weight to the post_parent in SearchWP, you may want to automatically append a contextual content snippet from each attached PDF to the original post excerpt. This filter can be used as a baseline for that customization, simply add it to your theme's functions.php and customize however you want. Pay particular…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function maybe_append_searchwp_pdf_excerpt( $excerpt ) { | |
global $post; | |
$pdf_excerpt_length = 15; // number of words in PDF excerpt | |
if ( is_search() && ! post_password_required() ) { | |
// prep our 'environment' | |
// set up common words | |
$common_words = array(); | |
if ( class_exists( 'SearchWP' ) ) { | |
$searchwp = SearchWP::instance(); | |
$common_words = $searchwp->common; | |
} | |
// grab the terms | |
$terms = explode( ' ', get_search_query() ); | |
$terms = array_map( 'sanitize_text_field', $terms ); | |
// if we're on a search page, we want to check to see if the current result | |
// has a PDF with any of the search terms in the content | |
// first we need to backtrack and find all of the PDFs that are attached to this post | |
// since their weight has been attributed to this post | |
$attached_pdfs = get_attached_media( 'application/pdf', $post->ID ); | |
foreach ( $attached_pdfs as $attached_pdf ) { | |
// check to make sure there is file content to scan | |
if ( $pdf_content = get_post_meta( $attached_pdf->ID, 'searchwp_content', true ) ) { | |
// find the first applicable search term (based on character length) | |
$flag = false; | |
foreach ( $terms as $termkey => $term ) { | |
if ( ! in_array( $term, $common_words ) && absint( apply_filters( 'searchwp_minimum_word_length', 3 ) ) <= strlen( $term ) ) { | |
$flag = $term; | |
break; | |
} | |
} | |
// our haystack is the PDF content | |
$haystack = explode( ' ', $pdf_content ); | |
$pdf_excerpt = ''; | |
// build our contextual excerpt | |
foreach ( $haystack as $haystack_key => $haystack_term ) { | |
preg_match( "/\b$flag\b(?!([^<]+)?>)/i", $haystack_term, $matches ); | |
if ( count( $matches ) ) { | |
// our buffer is going to be 1/3 the total number of words in hopes of snagging one or two more | |
// highlighted terms in the second and third thirds | |
$buffer = floor( ( $pdf_excerpt_length - 1 ) / 3 ); // -1 to accommodate the search term itself | |
// find the start point | |
$start = 0; | |
$underflow = 0; | |
if ( $haystack_key < $buffer ) { | |
// the match occurred too early to get a proper first buffer | |
$underflow = $buffer - $haystack_key; | |
} else { | |
// there is enough room to grab a proper first buffer | |
$start = $haystack_key - $buffer; | |
} | |
// find the end point | |
$end = count( $haystack ); | |
if ( $end > ( $haystack_key + ( $buffer * 2 ) ) ) { | |
$end = $haystack_key + ( $buffer * 2 ); | |
} | |
// if we had an underflow (e.g. the first buffer wasn't fully included) grab more at the end | |
$end += $underflow; | |
$pdf_excerpt = array_slice( $haystack, $start, $end - $start ); | |
$pdf_excerpt = implode( ' ', $pdf_excerpt ); | |
break; | |
} | |
} | |
// append our PDF-specific excerpt to the main excerpt | |
if ( ! empty( $pdf_excerpt ) ) { | |
$pdf_label = get_the_title( $attached_pdf->ID ); // the PDF label will be the title of the PDF post | |
$excerpt .= '<br /><br /><strong>' . $pdf_label . '</strong>: ' . $pdf_excerpt; | |
} | |
} | |
} | |
} | |
return $excerpt; | |
} | |
add_filter( 'get_the_excerpt', 'maybe_append_searchwp_pdf_excerpt' ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment