Skip to content

Instantly share code, notes, and snippets.

@jchristopher
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jchristopher/fba421486843f07f4c6d to your computer and use it in GitHub Desktop.
Save jchristopher/fba421486843f07f4c6d to your computer and use it in GitHub Desktop.
Integrate SearchWP with WP Document Revisions by indexing Revision content via SearchWP's Xpdf Integration extension (required) as a Custom Field named 'wp_document_revision'
<?php
// BEGIN WP Document Revisions/SearchWP integration (REQUIRES XPDF INTEGRATION EXTENSION)
// NOTE: after this snippet has been added to functions.php you will need to customize
// the Document weight on SearchWP's settings screen, adding a Custom Field with the
// name of 'wp_document_revision' and giving it the weight; that's where document
// content will be stored (as Custom Field content)
/**
* This was taken directly from WPDR's main class because we can't instantiate a new object
* Retrieves all revisions for a given post (including the current post)
* Workaround for #16215 to ensure revision author is accurate
* http://core.trac.wordpress.org/ticket/16215
* @since 1.0
* @param int $postID the post ID
* @return array array of post objects
*/
function mywpdr_get_revisions( $postID ) {
// Revision authors are actually shifted by one
// This moves each revision author up one, and then uses the post_author as the initial revision
//get the actual post
$post = get_post( $postID );
if ( !$post )
return false;
if ( $cache = wp_cache_get( $postID, 'document_revisions' ) )
return $cache;
//correct the modified date
$post->post_date = date( 'Y-m-d H:i:s', (int) get_post_modified_time( 'U', null, $postID ) );
//grab the post author
$post_author = $post->post_author;
//fix for Quotes in the most recent post because it comes from get_post
$post->post_excerpt = html_entity_decode( $post->post_excerpt );
//get revisions, and prepend the post
$revs = wp_get_post_revisions( $postID, array( 'order' => 'DESC' ) );
array_unshift( $revs, $post );
//loop through revisions
foreach ( $revs as $ID => &$rev ) {
//if this is anything other than the first revision, shift author 1
if ( $ID < sizeof( $revs ) - 1)
$rev->post_author = $revs[$ID+1]->post_author;
//if last revision, get the post author
else
$rev->post_author = $post_author;
}
wp_cache_set( $postID, $revs, 'document_revisions' );
return $revs;
}
/**
* This was taken directly from WPDR's main class because we can't instantiate a new object
* Given a post object, returns all attached uploads
* @since 0.5
* @param object $post (optional) post object
* @return object all attached uploads
*/
function mywpdr_get_attachments( $post = '' ) {
if ($post == '')
global $post;
//verify that it's an object
if ( !is_object( $post ) )
$post = get_post( $post );
//check for revisions
if ( $parent = wp_is_post_revision( $post ) )
$post = get_post( $parent );
//check for attachments
if ( $post->post_type == 'attachment' )
$post = get_post( $post->post_parent );
if ( !isset( $post->ID ) )
return array();
$args = array(
'post_parent' => $post->ID,
'post_status' => 'inherit',
'post_type' => 'attachment',
'order' => 'DESC',
'orderby' => 'post_date',
);
$args = apply_filters( 'document_revision_query', $args );
return get_children( $args );
}
/**
* This was taken directly from WPDR's main class because we can't instantiate a new object
* Given a post ID, returns the latest revision attachment
* @param unknown $post
* @return object latest revision object
*/
function mywpdr_get_latest_revision( $post ) {
if ( is_object( $post ) ) {
$post = $post->ID;
}
$revisions = mywpdr_get_revisions( $post );
if ( !$revisions )
return false;
//verify that there's an upload ID in the content field
//if there's no upload ID for some reason, default to latest attached upload
if ( !is_numeric( $revisions[0]->post_content ) ) {
$attachments = mywpdr_get_attachments( $post );
if ( empty( $attachments ) )
return false;
$latest_attachment = reset( $attachments );
$revisions[0]->post_content = $latest_attachment->ID;
}
return $revisions[0];
}
/**
* Tap in to the SearchWP indexing process and check if a WP Document Revisions post is being indexed. If it is, we're
* going to retrieve the most recent revision and extract the PDF content of that file. We'll store that data as a
* pseudo Custom Field called wp_document_revision, allowing SearchWP to search for that content
*
* @param $post_metadata
* @param $post_to_index
*
* @return bool
*/
function my_index_wp_document_revisions( $post_metadata, $post_to_index ) {
global $searchwp;
// make sure it's a WP Document Revisions Document
if( 'document' !== $post_to_index->post_type ) {
return $post_metadata;
}
// make sure Xpdf Integration is installed and activated
include_once( ABSPATH . 'wp-admin/includes/plugin.php' );
if ( ! is_plugin_active( 'searchwp-xpdf/searchwp-xpdf.php' ) || ! class_exists( 'SearchWPXpdf' ) ) {
return false;
}
// grab Xpdf and Document Revisions
$xpdf = new SearchWPXpdf();
// get the latest Revision
$rev_id = mywpdr_get_latest_revision( $post_to_index );
$rev_post = get_post ( $rev_id );
$revision = get_post( $rev_post->post_content );
$filename = get_attached_file( $revision->ID );
// grab the PDF content from Xpdf
$pdf_content = $xpdf->extractPdfContent( '', $filename );
// add it to the pseudo-metadata array
$post_metadata[ 'wp_document_revision' ] = $pdf_content;
return $post_metadata;
}
add_filter( 'searchwp_extra_metadata', 'my_index_wp_document_revisions', 10, 2 );
// END WP Document Revisions/SearchWP integration
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment