Skip to content

Instantly share code, notes, and snippets.

@jchristopher
Last active August 30, 2019 18:45
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jchristopher/fc1d461bd82b40e31290 to your computer and use it in GitHub Desktop.
Save jchristopher/fc1d461bd82b40e31290 to your computer and use it in GitHub Desktop.
Tell SearchWP to extract PDF contents of a file uploaded to an ACF file upload field and store that content with the post itself as custom metadata, not the Media entry
<?php
/**
* Extract PDF content from a file uploaded via ACF File field (where
* the actual data stored is only the Media ID) and store PDF
* content as SearchWP 'extra' metadata so it can be included in
* searches and attributed to the post itself, not the Media entry
*/
class My_SearchWP_Customizations {
// ACF field name for PDF upload field
private $my_acf_field_name = 'acf_pdf_field_name';
// custom field key used by SearchWP to store PDF content with the post
private $my_searchwp_acf_pdf_custom_field_key = 'searchwp_pdf_content';
/**
* Initializer; hook into SearchWP where necessary
*/
function init() {
// hook into SearchWP's indexer
add_filter( 'searchwp_extra_metadata', array( $this, 'maybe_extract_pdf_contents_from_acf_field' ), 10, 2 );
add_filter( 'searchwp_custom_field_keys', array( $this, 'add_custom_pdf_content_metakey' ), 10, 1 );
}
/**
* Callback from the SearchWP indexer to extract content from a PDF uploaded to the specified
* ACF upload field
*
* @param array $post_metadata The post metadata to index
* @param object $post_to_index The post object currently being indexed
*
* @return array The post metadata to index
*/
function maybe_extract_pdf_contents_from_acf_field( $post_metadata, $post_to_index ) {
// at the very least we need either the SearchWP indexer, or SearchWP Xpdf Integration
if ( ! class_exists( 'SearchWPIndexer' ) && ! class_exists( 'SearchWPXpdf' ) ) {
return $post_metadata;
}
// check for ACF field value to work with
$pdf_object_id = get_field( $this->my_acf_field_name );
// if there's no ACF value, there's no sense in continuing
if ( empty( $pdf_object_id ) ) {
return $post_metadata;
}
// grab the Media post object that is the PDF
$pdf_object = get_post( absint( $pdf_object_id ) );
// make sure it's a PDF
if ( 'application/pdf' == $pdf_object->post_mime_type ) {
return $post_metadata;
}
// if Xpdf Integration is enabled, we can use Xpdf here
if ( class_exists( 'SearchWPXpdf' ) ) {
// use Xpdf to parse the PDF
$xpdf = new SearchWPXpdf();
$xpdf->init();
$pdf_content = $xpdf->extract_pdf_content( '', get_attached_file( $pdf_object->ID ) );
} else {
// fall back to built-in PDF parsing and grab the PDF content
$indexer = new SearchWPIndexer();
$indexer->set_post( $pdf_object );
$pdf_content = $indexer->extract_pdf_text( $pdf_object->ID );
}
// add it to the pseudo-metadata array
$post_metadata[ $this->my_searchwp_acf_pdf_custom_field_key ] = $pdf_content;
return $post_metadata;
}
/**
* Make sure our 'extra' metadata key is visible in the Custom Fields dropdown on the settings page
*
* @param array $keys The Custom Field keys to show on the settings screen
*
* @return array The Custom Field keys to show on the settings screen
*/
function add_custom_pdf_content_metakey( $keys ) {
$keys[] = $this->my_searchwp_acf_pdf_custom_field_key;
return $keys;
}
}
// liftoff
$my_searchwp_customizations = new My_SearchWP_Customizations();
$my_searchwp_customizations->init();
@cfloresmodern
Copy link

cfloresmodern commented Aug 30, 2019

Unfortunately not working for me. Added it in functions.php and replaced the File field name with the field name that I am using for my File field. Also changed the output of the ACF file field to the ID instead of the default option that is Object. Am I doing anything wrong?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment