Created
August 28, 2020 03:51
-
-
Save msaari/b23f044d633867bbcdd3f90cdf7c8b6e to your computer and use it in GitHub Desktop.
Relevanssi indexing.php without doc count update
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* /lib/indexing.php | |
* | |
* @package Relevanssi | |
* @author Mikko Saari | |
* @license https://wordpress.org/about/gpl/ GNU General Public License | |
* @see https://www.relevanssi.com/ | |
*/ | |
add_filter( 'relevanssi_index_get_post_type', 'relevanssi_index_get_post_type', 1, 2 ); | |
/** | |
* Returns the total number of posts to index. | |
* | |
* Counts the total number of posts to index, considering post type restrictions and | |
* the valid statuses. | |
* | |
* @return int The number of posts to index. | |
*/ | |
function relevanssi_count_total_posts() { | |
$extend = false; | |
return relevanssi_indexing_post_counter( $extend ); | |
} | |
/** | |
* Returns the number of posts missing from the index. | |
* | |
* Counts the total number of posts to index, considering post type restrictions and | |
* the valid statuses, and only looks at posts missing from the index. | |
* | |
* @return int The number of posts to index. | |
*/ | |
function relevanssi_count_missing_posts() { | |
$extend = true; | |
return relevanssi_indexing_post_counter( $extend ); | |
} | |
/** | |
* Counts the total number of posts. | |
* | |
* Counts the total number of posts to index, considering post type restrictions and | |
* the valid statuses. | |
* | |
* @global object $wpdb The WordPress database interface. | |
* | |
* @param boolean $extend If true, count only missing posts. If false, count all | |
* posts. Default false. | |
* | |
* @return int The number of posts to index. | |
*/ | |
function relevanssi_indexing_post_counter( $extend = false ) { | |
global $wpdb; | |
$restriction = relevanssi_post_type_restriction(); | |
$valid_status = relevanssi_valid_status_array(); | |
$limit = ''; | |
$query = relevanssi_generate_indexing_query( $valid_status, $extend, $restriction, $limit ); | |
$query = str_replace( 'SELECT post.ID', 'SELECT COUNT(post.ID)', $query ); | |
/** | |
* Allows actions to happen before the indexing query is run. | |
* | |
* The indexing query fetches a list of posts to index (either all posts or only | |
* those missing from the index, depending on the case). | |
*/ | |
do_action( 'relevanssi_pre_indexing_query' ); | |
$count = $wpdb->get_var( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared | |
if ( empty( $count ) ) { | |
$count = 0; | |
} | |
return $count; | |
} | |
/** | |
* Generates the indexing query. | |
* | |
* Generates the query that fetches the list of posts to index. The parameters are | |
* assumed to be safely escaped. In regular use, the values are generated by | |
* Relevanssi functions which provide reliable source data. | |
* | |
* @global object $wpdb The WordPress database interface. | |
* @global array $relevanssi_variables The Relevanssi global variables array, used | |
* for table names. | |
* | |
* @param string $valid_status Comma-separated list of valid post statuses. | |
* @param boolean $extend If true, only care about posts missing from the | |
* index. If false, take all posts. Default false. | |
* @param string $restriction Query restrictions, MySQL code that restricts the | |
* posts fetched in the desired way. Default ''. | |
* @param string $limit MySQL code to set the LIMIT and OFFSET values. | |
* Default ''. | |
* | |
* @return string MySQL query to fetch the posts. | |
*/ | |
function relevanssi_generate_indexing_query( $valid_status, $extend = false, $restriction = '', $limit = '' ) { | |
global $wpdb, $relevanssi_variables; | |
$relevanssi_table = $relevanssi_variables['relevanssi_table']; | |
if ( 'off' === get_option( 'relevanssi_index_image_files', 'off' ) ) { | |
$restriction .= " | |
AND post.ID NOT IN ( | |
SELECT ID FROM $wpdb->posts WHERE post_type = 'attachment' | |
AND post_mime_type LIKE 'image%' ) | |
"; | |
} | |
/** | |
* Filters the WHERE restriction for indexing queries. | |
* | |
* This filter hook can be used to exclude posts from indexing as early as is | |
* possible. | |
* | |
* @since 4.0.9 / 2.1.5 | |
* | |
* @param string The WHERE restriction. | |
* | |
* @param array $restriction An array with two values: 'mysql' for the MySQL | |
* query restriction to modify, 'reason' for the reason of restriction. | |
*/ | |
$restriction = apply_filters( | |
'relevanssi_indexing_restriction', | |
array( | |
'mysql' => $restriction, | |
'reason' => '', | |
) | |
); | |
/** | |
* Backwards compatibility for the change in filter parameters in Premium | |
* 2.8.0 in March 2020. Remove this eventually. | |
*/ | |
if ( is_string( $restriction ) ) { | |
$restriction['mysql'] = $restriction; | |
$restriction['reason'] = 'relevanssi_indexing_restriction filter'; | |
} | |
if ( ! $extend ) { | |
$q = "SELECT post.ID | |
FROM $wpdb->posts post | |
LEFT JOIN $wpdb->posts parent ON (post.post_parent=parent.ID) | |
WHERE | |
(post.post_status IN ($valid_status) | |
OR | |
(post.post_status='inherit' | |
AND( | |
(parent.ID is not null AND (parent.post_status IN ($valid_status))) | |
OR (post.post_parent=0) | |
) | |
)) | |
AND post.ID NOT IN (SELECT post_id FROM $wpdb->postmeta WHERE meta_key = '_relevanssi_hide_post' AND meta_value = 'on') | |
{$restriction['mysql']} ORDER BY post.ID DESC $limit"; | |
} else { | |
$processed_post_filter = 'r.doc is null'; | |
if ( 'noindex' !== get_option( 'relevanssi_internal_links', 'noindex' ) ) { | |
$processed_post_filter = "(r.doc is null OR r.doc NOT IN (SELECT DISTINCT(doc) FROM $relevanssi_table WHERE link = 0))"; | |
} | |
$q = "SELECT post.ID | |
FROM $wpdb->posts post | |
LEFT JOIN $wpdb->posts parent ON (post.post_parent=parent.ID) | |
LEFT JOIN $relevanssi_table r ON (post.ID=r.doc) | |
WHERE | |
$processed_post_filter | |
AND | |
(post.post_status IN ($valid_status) | |
OR | |
(post.post_status='inherit' | |
AND( | |
(parent.ID is not null AND (parent.post_status IN ($valid_status))) | |
OR (post.post_parent=0) | |
) | |
) | |
) | |
AND post.ID NOT IN (SELECT post_id FROM $wpdb->postmeta WHERE meta_key = '_relevanssi_hide_post' AND meta_value = 'on') | |
{$restriction['mysql']} ORDER BY post.ID DESC $limit"; | |
} | |
/** | |
* Filters the Relevanssi indexing query. | |
* | |
* @param string $q The indexing MySQL query. | |
*/ | |
return apply_filters( 'relevanssi_indexing_query', $q ); | |
} | |
/** | |
* Generates a post type restriction. | |
* | |
* Generates a post type restriction for the MySQL query based on the | |
* 'relevanssi_index_post_types' option. | |
* | |
* @return string MySQL code for the post type restriction. | |
*/ | |
function relevanssi_post_type_restriction() { | |
$post_types = array(); | |
$restriction = ''; | |
$types = get_option( 'relevanssi_index_post_types' ); | |
if ( ! is_array( $types ) ) { | |
$types = array(); | |
} | |
foreach ( $types as $type ) { | |
if ( 'bogus' === $type ) { | |
// 'bogus' is not a real post type Relevanssi uses to make sure | |
// the post type setting is saved, even if it's empty. | |
continue; | |
} | |
if ( post_type_exists( $type ) ) { | |
// Only accept post types that actually exist. | |
array_push( $post_types, "'$type'" ); | |
} | |
} | |
if ( empty( $post_types ) ) { | |
$post_types[] = "'no_post_types_chosen_so_index_no_posts'"; | |
} | |
if ( count( $post_types ) > 0 ) { | |
$restriction = ' AND post.post_type IN (' . implode( ', ', $post_types ) . ') '; | |
} | |
return $restriction; | |
} | |
/** | |
* Generates a list of valid post statuses. | |
* | |
* Generates a list of valid post statuses to use in indexing. By default, | |
* Relevanssi accepts 'publish', 'draft', 'private', 'pending', and 'future'. If | |
* you need to use a custom post status, you can use the | |
* 'relevanssi_valid_status' filter hook to add your own post status to the list | |
* of valid statuses. | |
* | |
* @param boolean $return_array If true, return array; default false, return | |
* string. | |
* | |
* @return string|array A comma-separated list of escaped valid post statuses | |
* ready for MySQL, or an unfiltered array, depending on the $return_array | |
* parameter. | |
*/ | |
function relevanssi_valid_status_array( $return_array = false ) { | |
/** | |
* Filters the valid status array. | |
* | |
* Allows you to modify the array that contains all valid post statuses for | |
* Relevanssi post indexing. | |
* | |
* @return array Array of post statuses. | |
*/ | |
$valid_status_array = apply_filters( 'relevanssi_valid_status', array( 'publish', 'draft', 'private', 'pending', 'future' ) ); | |
if ( $return_array ) { | |
return $valid_status_array; | |
} | |
$valid_status = array(); | |
if ( is_array( $valid_status_array ) && count( $valid_status_array ) > 0 ) { | |
foreach ( $valid_status_array as $status ) { | |
$valid_status[] = "'" . esc_sql( $status ) . "'"; | |
} | |
$valid_status = implode( ',', $valid_status ); | |
} else { | |
// If the filter makes the setting a non-array, fall back to reasonable | |
// default values. | |
$valid_status = "'publish', 'draft', 'private', 'pending', 'future'"; | |
} | |
return $valid_status; | |
} | |
/** | |
* Builds the index. | |
* | |
* @global object $wpdb The WordPress database interface. | |
* @global array $relevanssi_variables The Relevanssi global variables array, | |
* used for table names. | |
* | |
* @param boolean|int $extend_offset If numeric, offsets the indexing by that | |
* amount. If true, doesn't truncate the index before indexing. If false, | |
* truncates index before indexing. Default false. | |
* @param boolean $verbose Not used anymore, kept for backwards | |
* compatibility. | |
* @param int $post_limit How many posts to index. Default null, no | |
* limit. | |
* @param boolean $is_ajax If true, indexing is done in AJAX context. | |
* Default false. | |
* | |
* @return array In AJAX context, returns array with two values: | |
* 'indexing_complete' tells whether indexing is completed or not, and 'indexed' | |
* returns the number of posts indexed. Outside AJAX context, these values are | |
* returned as an array in format of array(completed, posts indexed). | |
*/ | |
function relevanssi_build_index( $extend_offset = false, $verbose = null, $post_limit = null, $is_ajax = false ) { | |
global $wpdb, $relevanssi_variables; | |
$relevanssi_table = $relevanssi_variables['relevanssi_table']; | |
// Thanks to Julien Mession. This speeds up indexing a lot. | |
wp_suspend_cache_addition( true ); | |
if ( false === $extend_offset ) { | |
// Truncate the index first. | |
relevanssi_truncate_index(); | |
// Taxonomy term, user profile and post type archive indexing. | |
if ( function_exists( 'relevanssi_premium_indexing' ) ) { | |
relevanssi_premium_indexing(); | |
} | |
update_option( 'relevanssi_index', '', false ); | |
} | |
$indexing_query_args = relevanssi_indexing_query_args( $extend_offset, $post_limit ); | |
// The values generated by these functions are safe to use for MySQL. | |
$restriction = relevanssi_post_type_restriction(); | |
$valid_status = relevanssi_valid_status_array(); | |
$query = relevanssi_generate_indexing_query( | |
$valid_status, | |
$indexing_query_args['extend'], | |
$restriction, | |
$indexing_query_args['limit'] | |
); | |
/* This action documented earlier in lib/indexing.php. */ | |
do_action( 'relevanssi_pre_indexing_query' ); | |
$content = $wpdb->get_results( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared | |
if ( defined( 'WP_CLI' ) && WP_CLI && function_exists( 'relevanssi_generate_progress_bar' ) ) { | |
// @codeCoverageIgnoreStart | |
$progress = relevanssi_generate_progress_bar( 'Indexing posts', count( $content ) ); | |
// @codeCoverageIgnoreEnd | |
} | |
$custom_fields = relevanssi_get_custom_fields(); | |
$n = 0; | |
$remove_first = false; | |
$bypass_global_post = true; // $bypassglobalpost set to true, because at this | |
// point global $post should be null, but in some cases it is not. | |
foreach ( $content as $post ) { | |
$result = relevanssi_index_doc( $post->ID, $remove_first, $custom_fields, $bypass_global_post ); | |
if ( is_numeric( $result ) && $result > 0 ) { | |
// $n calculates the number of posts indexed. | |
$n++; | |
} | |
if ( defined( 'WP_CLI' ) && WP_CLI && $progress ) { | |
// @codeCoverageIgnoreStart | |
$progress->tick(); | |
// @codeCoverageIgnoreEnd | |
} | |
} | |
if ( defined( 'WP_CLI' ) && WP_CLI && $progress ) { | |
// @codeCoverageIgnoreStart | |
$progress->finish(); | |
// @codeCoverageIgnoreEnd | |
} | |
// To prevent empty indices. | |
$wpdb->query( "ANALYZE TABLE $relevanssi_table" ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared | |
$complete = false; | |
$size = $indexing_query_args['size']; | |
if ( ( 0 === $size ) || ( count( $content ) < $size ) ) { | |
$complete = true; | |
update_option( 'relevanssi_indexed', 'done', false ); | |
// Update the document count variable. | |
// relevanssi_async_update_doc_count(); | |
} | |
wp_suspend_cache_addition( false ); | |
if ( $is_ajax ) { | |
$response = array( | |
'indexing_complete' => $complete, | |
'indexed' => $n, | |
); | |
return $response; | |
} | |
return array( $complete, $n ); | |
} | |
/** | |
* Indexes one document. | |
* | |
* Different cases: | |
* | |
* Build index: | |
* - global $post is null, $index_post is a post object. | |
* | |
* Update post: | |
* - global $post has the original $post, $index_post is the ID of revision. | |
* | |
* Quick edit: | |
* - global $post is an array, $index_post is the ID of current revision. | |
* | |
* @global object $wpdb The WordPress database interface. | |
* @global array $relevanssi_variables The Relevanssi global variables array, used | |
* for table names. | |
* @global object $post The global post object. | |
* | |
* @param object|int $index_post The post to index, either post object or | |
* post ID. | |
* @param boolean $remove_first If true, remove the post from the index | |
* before indexing. Default false. | |
* @param array $custom_fields The custom fields that are indexed for the | |
* post. Default an empty string. | |
* @param boolean $bypass_global_post If true, do not use the global $post object. | |
* Default false. | |
* @param boolean $debug If true, echo out debugging information. | |
* Default false. | |
* | |
* @return string|int Number of insert queries run, or -1 if the indexing fails, | |
* or 'hide' in case the post is hidden or 'donotindex' if a filter blocks this. | |
*/ | |
function relevanssi_index_doc( $index_post, $remove_first = false, $custom_fields = '', $bypass_global_post = false, $debug = false ) { | |
global $wpdb, $post, $relevanssi_variables; | |
$relevanssi_table = $relevanssi_variables['relevanssi_table']; | |
$post_was_null = true; | |
$previous_post = null; | |
// Check if this is a Jetpack Contact Form entry. | |
if ( isset( $_REQUEST['contact-form-id'] ) ) { // phpcs:ignore WordPress.Security.NonceVerification | |
return -1; | |
} | |
if ( isset( $post ) ) { | |
$post_was_null = false; | |
$previous_post = $post; | |
} | |
if ( empty( $post ) || $bypass_global_post ) { | |
$post = is_object( $index_post ) ? $index_post : get_post( $index_post ); // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
} | |
// Finally fetch the post again by ID. Complicated, yes, but unless we do this, | |
// we might end up indexing the post before the updates come in. | |
$post = isset( $post->ID ) ? get_post( $post->ID ) : null; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
if ( null === $post ) { | |
// At this point we should have something in $post; if not, quit. | |
if ( $previous_post || $post_was_null ) { | |
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
} | |
return -1; | |
} | |
// Post exclusion feature from Relevanssi Premium. | |
if ( function_exists( 'relevanssi_hide_post' ) ) { | |
if ( relevanssi_hide_post( $post->ID ) ) { | |
if ( $debug ) { | |
relevanssi_debug_echo( 'relevanssi_hide_post() returned true.' ); | |
} | |
if ( $previous_post || $post_was_null ) { | |
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
} | |
update_post_meta( | |
$post->ID, | |
'_relevanssi_noindex_reason', | |
__( 'Relevanssi index exclude', 'relevanssi' ) | |
); | |
return 'hide'; | |
} | |
} | |
$index_this_post = false; | |
$post->indexing_content = true; | |
$index_types = get_option( 'relevanssi_index_post_types', array() ); | |
if ( is_array( $index_types ) && in_array( $post->post_type, $index_types, true ) ) { | |
$index_this_post = true; | |
} | |
/** | |
* Filters whether a post is indexed or not. | |
* | |
* Allows you to filter whether a post is indexed or not. | |
* | |
* @param boolean|string If not false, the post is not indexed. The value | |
* can be a boolean, or a string containing an explanation for the | |
* exclusion. Default false. | |
* @param int The post ID. | |
*/ | |
$do_not_index = apply_filters( 'relevanssi_do_not_index', false, $post->ID ); | |
if ( $do_not_index ) { | |
// Filter says no. | |
if ( true === $do_not_index ) { | |
$do_not_index = __( 'Blocked by a filter function', 'relevanssi' ); | |
} | |
if ( $debug ) { | |
relevanssi_debug_echo( 'relevanssi_do_not_index says exclude, because: ' . $do_not_index ); | |
} | |
update_post_meta( $post->ID, '_relevanssi_noindex_reason', $do_not_index ); | |
$index_this_post = false; | |
} | |
if ( $remove_first ) { | |
// We are updating a post, so remove the old stuff first. | |
relevanssi_remove_doc( $post->ID, true ); | |
if ( function_exists( 'relevanssi_remove_item' ) ) { | |
relevanssi_remove_item( $post->ID, 'post' ); | |
} | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Removed the post from the index.' ); | |
} | |
} | |
// This needs to be here, after the call to relevanssi_remove_doc(), because | |
// otherwise a post that's in the index but shouldn't be there won't get removed. | |
if ( ! $index_this_post ) { | |
if ( $previous_post || $post_was_null ) { | |
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
} | |
return 'donotindex'; | |
} | |
$n = 0; | |
/** | |
* Allows filtering the indexed post before indexing. | |
* | |
* @param object $post The post object. | |
* @param object $post The post object again (in other uses for this filter, the | |
* second parameter actually makes sense). | |
*/ | |
$post = apply_filters( 'relevanssi_post_to_index', $post, $post ); // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
$min_word_length = get_option( 'relevanssi_min_word_length', 3 ); | |
$insert_data = array(); | |
if ( 'none' !== get_option( 'relevanssi_index_comments' ) ) { | |
$n += relevanssi_index_comments( $insert_data, $post->ID, $min_word_length, $debug ); | |
} | |
$taxonomies = get_option( 'relevanssi_index_taxonomies_list', array() ); | |
foreach ( $taxonomies as $taxonomy ) { | |
$n += relevanssi_index_taxonomy_terms( $insert_data, $post->ID, $taxonomy, $debug ); | |
} | |
if ( 'on' === get_option( 'relevanssi_index_author' ) ) { | |
$n += relevanssi_index_author( $insert_data, $post->post_author, $min_word_length, $debug ); | |
} | |
$n += relevanssi_index_custom_fields( $insert_data, $post->ID, $custom_fields, $min_word_length, $debug ); | |
if ( | |
isset( $post->post_excerpt ) | |
&& ( 'on' === get_option( 'relevanssi_index_excerpt' ) || 'attachment' === $post->post_type ) | |
) { | |
// Attachment caption is stored in the excerpt. | |
$n += relevanssi_index_excerpt( $insert_data, $post->post_excerpt, $min_word_length, $debug ); | |
} | |
// Premium can index arbitrary MySQL columns. | |
if ( function_exists( 'relevanssi_index_mysql_columns' ) ) { | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Indexing MySQL columns.' ); | |
} | |
$insert_data = relevanssi_index_mysql_columns( $insert_data, $post->ID ); | |
} | |
// Premium can index PDF content for the parent post. | |
if ( function_exists( 'relevanssi_index_pdf_for_parent' ) ) { | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Indexing PDF content for parent post.' ); | |
} | |
$insert_data = relevanssi_index_pdf_for_parent( $insert_data, $post->ID ); | |
} | |
$n += relevanssi_index_title( $insert_data, $post, $min_word_length, $debug ); | |
$n += relevanssi_index_content( $insert_data, $post, $min_word_length, $debug ); | |
$values = relevanssi_convert_data_to_values( $insert_data, $post ); | |
if ( ! empty( $values ) ) { | |
$values = implode( ', ', $values ); | |
$query = "INSERT IGNORE INTO $relevanssi_table (doc, term, term_reverse, content, title, comment, tag, link, author, category, excerpt, taxonomy, customfield, type, taxonomy_detail, customfield_detail, mysqlcolumn) VALUES $values"; | |
if ( $debug ) { | |
relevanssi_debug_echo( "Final indexing query:\n\t$query" ); | |
} | |
$wpdb->query( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared | |
} | |
delete_post_meta( $post->ID, '_relevanssi_noindex_reason' ); | |
if ( $previous_post || $post_was_null ) { | |
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
} | |
return $n; | |
} | |
/** | |
* Index taxonomy terms for given post and given taxonomy. | |
* | |
* @since 1.8 | |
* | |
* @param array $insert_data Insert query data array, modified here. | |
* @param int $post_id The indexed post ID. | |
* @param string $taxonomy Taxonomy name. | |
* @param boolean $debug If true, print out debugging notices. | |
* | |
* @return int The number of new tokens added. | |
*/ | |
function relevanssi_index_taxonomy_terms( &$insert_data, $post_id, $taxonomy, $debug ) { | |
if ( $debug ) { | |
relevanssi_debug_echo( "Indexing taxonomy terms for $taxonomy" ); | |
} | |
$n = 0; | |
$min_word_length = get_option( 'relevanssi_min_word_length', 3 ); | |
$post_taxonomy_terms = get_the_terms( $post_id, $taxonomy ); | |
if ( false === $post_taxonomy_terms ) { | |
return $n; | |
} | |
$term_string = ''; | |
foreach ( $post_taxonomy_terms as $post_term ) { | |
if ( is_object( $post_term ) ) { | |
$term_string .= $post_term->name . ' '; | |
} | |
} | |
if ( $debug ) { | |
relevanssi_debug_echo( "Taxonomy term content for $taxonomy: $term_string" ); | |
} | |
/** | |
* Filters the taxonomy term content before indexing. | |
* | |
* @param string The taxonomy term content. | |
* @param string The taxonomy term name. | |
* @param string The taxonomy. | |
* @param int The post ID. | |
*/ | |
$term_string = apply_filters( 'relevanssi_tag_before_tokenize', trim( $term_string ), $post_term, $taxonomy, $post_id ); | |
/** This filter is documented in lib/indexing.php */ | |
$term_tokens = apply_filters( | |
'relevanssi_indexing_tokens', | |
relevanssi_tokenize( $term_string, true, $min_word_length ), | |
'taxonomy-' . $taxonomy | |
); | |
if ( count( $term_tokens ) > 0 ) { | |
foreach ( $term_tokens as $token => $count ) { | |
$n++; | |
switch ( $taxonomy ) { | |
case 'post_tag': | |
$type = 'tag'; | |
break; | |
case 'category': | |
$type = 'category'; | |
break; | |
default: | |
$type = 'taxonomy'; | |
} | |
$insert_data[ $token ][ $type ] = isset( $insert_data[ $token ][ $type ] ) | |
? $insert_data[ $token ][ $type ] + $count : $count; | |
$tax_detail = array(); | |
if ( isset( $insert_data[ $token ]['taxonomy_detail'] ) ) { | |
$tax_detail = json_decode( $insert_data[ $token ]['taxonomy_detail'], true ); | |
} | |
$tax_detail[ $taxonomy ] = isset( $tax_detail[ $taxonomy ] ) | |
? $tax_detail[ $taxonomy ] + $count : $count; | |
$insert_data[ $token ]['taxonomy_detail'] = wp_json_encode( $tax_detail ); | |
} | |
} | |
return $n; | |
} | |
/** | |
* Updates child posts when a parent post changes status. | |
* | |
* Called from 'transition_post_status' action hook when a post is edited, | |
* published, or deleted. Will do the appropriate indexing action on the child | |
* posts and attachments. | |
* | |
* @author renaissancehack | |
* | |
* @param string $new_status The new status. | |
* @param string $old_status The old status. | |
* @param object $post The post object. | |
* | |
* @return null|array Null in problem cases, an array of 'removed' and | |
* 'indexed' values that show how many posts were indexed and removed. | |
*/ | |
function relevanssi_update_child_posts( $new_status, $old_status, $post ) { | |
// Safety check, for WordPress Editorial Calendar incompatibility. | |
if ( ! isset( $post ) || ! isset( $post->ID ) ) { | |
return; | |
} | |
/** Documented in lib/indexing.php. */ | |
$index_statuses = apply_filters( | |
'relevanssi_valid_status', | |
array( 'publish', 'private', 'draft', 'pending', 'future' ) | |
); | |
/** | |
* Filters the attachment and revision post types. | |
* | |
* If you want attachment indexing to cover other post types than just | |
* attachment, you need to include the new post type in the array with | |
* this filter. | |
* | |
* @param array Array of post types, default 'attachment' and 'revision'. | |
*/ | |
$attachment_revision_types = apply_filters( | |
'relevanssi_index_attachment_revision_types', | |
array( 'attachment', 'revision' ) | |
); | |
$did_nothing = array( | |
'removed' => 0, | |
'indexed' => 0, | |
); | |
/** | |
* Either: | |
* | |
* 1. New status equals old status. | |
* 2. Both new and old status are in the list of stati to index. | |
* 3. The post is an attachment or a revision. | |
* | |
* In any of these cases, do nothing. | |
*/ | |
if ( $new_status === $old_status ) { | |
return $did_nothing; | |
} | |
if ( in_array( $new_status, $index_statuses, true ) && in_array( $old_status, $index_statuses, true ) ) { | |
return $did_nothing; | |
} | |
if ( in_array( $post->post_type, $attachment_revision_types, true ) ) { | |
return $did_nothing; | |
} | |
$post_types = get_option( 'relevanssi_index_post_types' ); | |
$args = array( | |
'post_parent' => $post->ID, | |
'post_type' => $post_types, | |
); | |
$removed = 0; | |
$indexed = 0; | |
$child_posts = get_children( $args ); | |
if ( ! empty( $child_posts ) ) { | |
if ( ! in_array( $new_status, $index_statuses, true ) ) { | |
foreach ( $child_posts as $post ) { | |
relevanssi_remove_doc( $post->ID ); | |
$removed++; | |
} | |
} else { | |
foreach ( $child_posts as $post ) { | |
relevanssi_publish( $post->ID ); | |
$indexed++; | |
} | |
} | |
} | |
return array( | |
'removed' => $removed, | |
'indexed' => $indexed, | |
); | |
} | |
/** | |
* Indexes a published post. | |
* | |
* @param int $post_id The post ID. | |
* @param boolean $bypass_global_post If true, bypass the global $post object. | |
* Default false. | |
* | |
* @return string|int Returns 'auto-draft' if the post is an auto draft and | |
* thus skipped, or the relevanssi_index_doc() return value. | |
* | |
* @see relevanssi_index_doc() | |
*/ | |
function relevanssi_publish( $post_id, $bypass_global_post = false ) { | |
$post_status = get_post_status( $post_id ); | |
if ( 'auto-draft' === $post_status ) { | |
return 'auto-draft'; | |
} | |
$custom_fields = relevanssi_get_custom_fields(); | |
return relevanssi_index_doc( $post_id, true, $custom_fields, $bypass_global_post ); | |
} | |
/** | |
* Indexes a post after publishing or modification. | |
* | |
* Hooks on to 'wp_insert_post' action hook and triggers when wp_insert_post() is | |
* used to add a post into the database. Doesn't use the global $post object, because | |
* that doesn't have the correct post. | |
* | |
* @author Lumpysimon. | |
* | |
* @global object $wpdb The WP database interface. | |
* | |
* @param int $post_id The post ID. | |
* | |
* @return string|int Returns 'auto-draft' if the post is an auto draft and | |
* thus skipped, 'revision' for revisions, 'removed' if the post is removed or | |
* the relevanssi_index_doc() return value from relevanssi_publish(). | |
* | |
* @see relevanssi_publish() | |
*/ | |
function relevanssi_insert_edit( $post_id ) { | |
global $wpdb; | |
if ( 'revision' === relevanssi_get_post_type( $post_id ) ) { | |
return 'revision'; | |
} | |
$post_status = get_post_status( $post_id ); | |
if ( 'auto-draft' === $post_status ) { | |
return 'auto-draft'; | |
} | |
if ( 'inherit' === $post_status ) { | |
// Get the post status from the parent post. | |
$parent_id = wp_get_post_parent_id( $post_id ); | |
$post_status = get_post_status( $parent_id ); | |
} | |
$index_this_post = true; | |
/* Documented in lib/indexing.php. */ | |
$restriction = apply_filters( | |
'relevanssi_indexing_restriction', | |
array( | |
'mysql' => '', | |
'reason' => '', | |
) | |
); | |
if ( ! empty( $restriction['mysql'] ) ) { | |
// Check the indexing restriction filter: if the post passes the filter, this | |
// should return the post ID. | |
$is_unrestricted = $wpdb->get_var( | |
"SELECT ID FROM $wpdb->posts AS post WHERE ID = $post_id {$restriction['mysql']}" // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared | |
); | |
if ( ! $is_unrestricted ) { | |
$index_this_post = false; | |
} | |
} | |
$return_array = true; | |
$index_statuses = relevanssi_valid_status_array( $return_array ); | |
if ( ! in_array( $post_status, $index_statuses, true ) ) { | |
$index_this_post = false; | |
} | |
if ( $index_this_post ) { | |
$bypass_global_post = true; | |
$return_value = relevanssi_publish( $post_id, $bypass_global_post ); | |
if ( is_int( $return_value ) && $return_value > 0 ) { | |
// relevanssi_async_update_doc_count(); | |
} | |
} else { | |
// The post isn't supposed to be indexed anymore, remove it from index. | |
relevanssi_remove_doc( $post_id ); | |
update_post_meta( | |
$post_id, | |
'_relevanssi_noindex_reason', | |
trim( $restriction['reason'] ) | |
); | |
$return_value = 'removed'; | |
} | |
return $return_value; | |
} | |
/** | |
* Updates comment indexing when comments are added, edited or deleted. | |
* | |
* @author OdditY | |
* | |
* @param int $comment_id Commend ID. | |
* | |
* @see relevanssi_comment_remove | |
* @see relevanssi_comment_edit | |
* @see relevanssi_publish | |
* | |
* @return int|string The relevanssi_publish return value, "nocommentfound" if | |
* the comment doesn't exist or "donotindex" if it cannot be indexed. | |
* comment indexing is disabled. | |
*/ | |
function relevanssi_index_comment( $comment_id ) { | |
$comment_indexing_type = get_option( 'relevanssi_index_comments' ); | |
$no_pingbacks = false; | |
if ( 'normal' === $comment_indexing_type ) { | |
$no_pingbacks = true; | |
} | |
if ( 'normal' !== $comment_indexing_type && 'all' !== $comment_indexing_type ) { | |
return 'donotindex'; | |
} | |
$comment = get_comment( $comment_id ); | |
if ( ! $comment ) { | |
return 'nocommentfound'; | |
} | |
if ( $no_pingbacks && 'pingback' === $comment->comment_type ) { | |
return 'donotindex'; | |
} | |
if ( 1 !== intval( $comment->comment_approved ) ) { | |
// Comment isn't approved, do not index. | |
return 'donotindex'; | |
} | |
return relevanssi_publish( $comment->comment_post_ID ); | |
} | |
/** | |
* Returns the comment text for a post. | |
* | |
* @param int $post_id The post ID. | |
* | |
* @return string All the comment content as a string that has the comment author | |
* and the comment text. | |
*/ | |
function relevanssi_get_comments( $post_id ) { | |
/** | |
* If this filter returns true, the comments for the post are not indexed. | |
* | |
* @param boolean Return true to block the comment indexing. Default false. | |
* @param int $post_id The post ID. | |
*/ | |
if ( apply_filters( 'relevanssi_index_comments_exclude', false, $post_id ) ) { | |
return ''; | |
} | |
$comment_indexing = get_option( 'relevanssi_index_comments' ); | |
$comment_types = array( 'comment' ); | |
$comment_string = ''; | |
if ( 'all' === $comment_indexing ) { | |
$comment_types[] = 'pings'; | |
} | |
if ( 'none' === $comment_indexing ) { | |
return ''; | |
} | |
$offset = 0; | |
$limit = 20; | |
while ( true ) { | |
// Posts may have lots of comments. Do 20 at the time to avoid memory issues. | |
$args = array( | |
'offset' => $offset, | |
'number' => $limit, | |
'type' => $comment_types, | |
); | |
$comments = get_approved_comments( | |
$post_id, | |
/** | |
* Filters the arguments for get_approved_comments(). | |
* | |
* Useful for indexing custom comment types, for example. | |
* | |
* @param array An array of arguments. Don't adjust 'offset' or | |
* 'number' to avoid problems. | |
*/ | |
apply_filters( | |
'relevanssi_get_approved_comments_args', | |
$args | |
) | |
); | |
if ( count( $comments ) === 0 ) { | |
break; | |
} | |
foreach ( $comments as $comment ) { | |
/** | |
* Filters the comment author before indexing. | |
* | |
* @param string Comment author display name. | |
* @param int The comment ID. | |
*/ | |
$comment_string .= ' ' . apply_filters( 'relevanssi_comment_author_to_index', $comment->comment_author, $comment->comment_ID ); | |
/** | |
* Filters the comment content before indexing. | |
* | |
* @param string Comment content. | |
* @param int The comment ID. | |
*/ | |
$comment_string .= ' ' . apply_filters( 'relevanssi_comment_content_to_index', $comment->comment_content, $comment->comment_ID ); | |
} | |
$offset += $limit; | |
} | |
return $comment_string; | |
} | |
/** | |
* Truncates the Relevanssi index. | |
* | |
* @global object $wpdb The WordPress database interface. | |
* @global array $relevanssi_variables The Relevanssi global variables array, used | |
* for table names. | |
* | |
* @return boolean True on success, false on failure. | |
*/ | |
function relevanssi_truncate_index() { | |
global $wpdb, $relevanssi_variables; | |
$relevanssi_table = $relevanssi_variables['relevanssi_table']; | |
return $wpdb->query( "TRUNCATE TABLE $relevanssi_table" ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared | |
} | |
/** | |
* Remove post from the Relevanssi index. | |
* | |
* @global object $wpdb The WordPress database interface. | |
* @global array $relevanssi_variables The Relevanssi global variables array, used | |
* for table names. | |
* | |
* @param int $post_id The post ID. | |
* @param boolean $keep_internal_links If true, keep internal link indexing (a | |
* Premium feature). Default false. | |
*/ | |
function relevanssi_remove_doc( $post_id, $keep_internal_links = false ) { | |
if ( function_exists( 'relevanssi_premium_remove_doc' ) ) { | |
// Premium has a different method, because the index can include taxonomy | |
// terms and user profiles. | |
relevanssi_premium_remove_doc( $post_id, $keep_internal_links ); | |
} else { | |
global $wpdb, $relevanssi_variables; | |
$post_id = intval( $post_id ); | |
if ( empty( $post_id ) ) { | |
// No post ID specified. | |
return; | |
} | |
$rows_updated = $wpdb->query( | |
$wpdb->prepare( | |
'DELETE FROM ' . $relevanssi_variables['relevanssi_table'] . ' WHERE doc=%d', // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared | |
$post_id | |
) | |
); | |
if ( $rows_updated && $rows_updated > 0 ) { | |
// relevanssi_async_update_doc_count(); | |
} | |
} | |
} | |
/** | |
* Filter that allows you to set the index type based on the post type. | |
* | |
* @param string $type The index 'type' column value, default 'post'. | |
* @param object $post The post object containing the post being indexed. | |
* | |
* @return string The index 'type' column value, default 'post'. | |
*/ | |
function relevanssi_index_get_post_type( $type, $post ) { | |
if ( 'attachment' === $post->post_type ) { | |
$type = 'attachment'; | |
} | |
return $type; | |
} | |
/** | |
* Sets the indexing MySQL LIMIT parameter and other parameters. | |
* | |
* @param boolean|int $extend_offset If numeric, offsets the indexing by that | |
* amount. If true, doesn't truncate the index before indexing. If false, | |
* truncates index before indexing. Default false. | |
* @param int $post_limit How many posts to index. Default null, no | |
* limit. | |
* | |
* @return array Array with the LIMIT clause in 'limit', the extend boolean in | |
* 'extend' and the size integer in 'size'. | |
*/ | |
function relevanssi_indexing_query_args( $extend_offset, $post_limit ) { | |
$size = 0; | |
$limit = ''; | |
$extend = false; | |
// If $post_limit parameter is present, numeric and > 0, use that. | |
if ( isset( $post_limit ) && is_numeric( $post_limit ) && $post_limit > 0 ) { | |
$size = $post_limit; | |
$limit = " LIMIT $post_limit"; | |
} | |
if ( false === $extend_offset ) { | |
$extend = false; | |
} elseif ( ! is_numeric( $extend_offset ) ) { | |
// Extending, so do not truncate and skip the posts already in the index. | |
if ( ! $limit ) { | |
// CLI request with no limit specified. | |
$size = 200; | |
$limit = ' LIMIT 200'; | |
} | |
$extend = true; | |
} else { // $extend_offset is numeric. | |
if ( ! $limit ) { | |
// CLI request with no limit specified. | |
$size = 200; | |
$limit = ' LIMIT 200'; | |
} | |
if ( ! empty( $limit ) ) { | |
$limit .= " OFFSET $extend_offset"; | |
} | |
// Extend is set to false, because $limit now has LIMIT and OFFSET. | |
$extend = false; | |
} | |
return array( | |
'limit' => $limit, | |
'extend' => $extend, | |
'size' => $size, | |
); | |
} | |
/** | |
* Creates indexing queries for the comment content. | |
* | |
* @param array $insert_data The INSERT query data. Modified here. | |
* @param int $post_id The indexed post ID. | |
* @param int $min_word_length The minimum word length. | |
* @param boolean $debug If true, print out debug notices. | |
* | |
* @return int The number of tokens added to the data. | |
*/ | |
function relevanssi_index_comments( &$insert_data, $post_id, $min_word_length, $debug ) { | |
$n = 0; | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Indexing comments.' ); | |
} | |
$post_comments = relevanssi_get_comments( $post_id ); | |
if ( ! empty( $post_comments ) ) { | |
$post_comments = relevanssi_strip_invisibles( $post_comments ); | |
$post_comments = preg_replace( '/<[a-zA-Z\/][^>]*>/', ' ', $post_comments ); | |
$post_comments = wp_strip_all_tags( $post_comments ); | |
if ( $debug ) { | |
relevanssi_debug_echo( "Comment content: $post_comments" ); | |
} | |
/** | |
* Filters the indexing tokens before they are added to the $insert_data. | |
* | |
* @param array An array of token-frequency pairs. | |
* @param string The context of the tokens (eg. 'content', 'title'). | |
* | |
* @return array The filtered tokens. | |
*/ | |
$post_comments_tokens = apply_filters( | |
'relevanssi_indexing_tokens', | |
relevanssi_tokenize( $post_comments, true, $min_word_length ), | |
'comments' | |
); | |
if ( count( $post_comments_tokens ) > 0 ) { | |
foreach ( $post_comments_tokens as $token => $count ) { | |
$n++; | |
$insert_data[ $token ]['comment'] = $count; | |
} | |
} | |
} | |
return $n; | |
} | |
/** | |
* Creates indexing queries for the post author. | |
* | |
* @param array $insert_data The INSERT query data. Modified here. | |
* @param int $post_author The post author id. | |
* @param int $min_word_length The minimum word length. | |
* @param boolean $debug If true, print out debug notices. | |
* | |
* @return int The number of tokens added to the data. | |
*/ | |
function relevanssi_index_author( &$insert_data, $post_author, $min_word_length, $debug ) { | |
$n = 0; | |
$display_name = get_the_author_meta( 'display_name', $post_author ); | |
/** This filter is documented in lib/indexing.php */ | |
$name_tokens = apply_filters( | |
'relevanssi_indexing_tokens', | |
relevanssi_tokenize( $display_name, false, $min_word_length ), | |
'author' | |
); | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Indexing post author as: ' . implode( ' ', array_keys( $name_tokens ) ) ); | |
} | |
foreach ( $name_tokens as $token => $count ) { | |
$n++; | |
if ( ! isset( $insert_data[ $token ]['author'] ) ) { | |
$insert_data[ $token ]['author'] = 0; | |
} | |
$insert_data[ $token ]['author'] += $count; | |
} | |
return $n; | |
} | |
/** | |
* Creates indexing query data for custom fields. | |
* | |
* @param array $insert_data The INSERT query data. Modified here. | |
* @param int $post_id The indexed post ID. | |
* @param string|array $custom_fields The custom fields to index. | |
* @param int $min_word_length The minimum word length. | |
* @param boolean $debug If true, print out debug notices. | |
* | |
* @return int The number of tokens added to the data. | |
*/ | |
function relevanssi_index_custom_fields( &$insert_data, $post_id, $custom_fields, $min_word_length, $debug ) { | |
$n = 0; | |
$remove_underscore_fields = 'visible' === $custom_fields ? true : false; | |
if ( 'all' === $custom_fields || 'visible' === $custom_fields ) { | |
$custom_fields = get_post_custom_keys( $post_id ); | |
} | |
/** | |
* Filters the list of custom fields to index before indexing. | |
* | |
* @param array $custom_fields List of custom field names. | |
* @param int $post_id The post ID. | |
*/ | |
$custom_fields = apply_filters( 'relevanssi_index_custom_fields', $custom_fields, $post_id ); | |
if ( ! is_array( $custom_fields ) ) { | |
return 0; | |
} | |
$custom_fields = array_unique( $custom_fields ); | |
if ( $remove_underscore_fields ) { | |
$custom_fields = array_filter( | |
$custom_fields, | |
function( $field ) { | |
if ( '_relevanssi_pdf_content' === $field || '_' !== substr( $field, 0, 1 ) ) { | |
return $field; | |
} | |
} | |
); | |
} | |
// Premium includes some support for ACF repeater fields. | |
if ( function_exists( 'relevanssi_add_repeater_fields' ) ) { | |
relevanssi_add_repeater_fields( $custom_fields, $post_id ); | |
} | |
$custom_fields = array_filter( $custom_fields ); | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Custom fields to index: ' . implode( ', ', $custom_fields ) ); | |
} | |
foreach ( $custom_fields as $field ) { | |
/** | |
* Filters the custom field value before indexing. | |
* | |
* @param array Custom field values. | |
* @param string $field The custom field name. | |
* @param int $post_id The post ID. | |
*/ | |
$values = apply_filters( 'relevanssi_custom_field_value', get_post_meta( $post_id, $field, false ), $field, $post_id ); | |
if ( empty( $values ) || ! is_array( $values ) ) { | |
continue; | |
} | |
foreach ( $values as $value ) { | |
// Quick hack : allow indexing of PODS relationship custom fields // TMV. | |
if ( is_array( $value ) && isset( $value['post_title'] ) ) { | |
$value = $value['post_title']; | |
} | |
if ( function_exists( 'relevanssi_index_acf' ) ) { | |
// @codeCoverageIgnoreStart | |
// Handle ACF fields. Only defined when ACF is active. | |
$acf_tokens = relevanssi_index_acf( $insert_data, $post_id, $field, $value ); | |
if ( $acf_tokens ) { | |
$n += $acf_tokens; | |
continue; | |
} | |
// @codeCoverageIgnoreEnd | |
} | |
// Flatten other arrays. | |
if ( is_array( $value ) ) { | |
$value = relevanssi_flatten_array( $value ); | |
} | |
if ( $debug ) { | |
relevanssi_debug_echo( "\tKey: " . $field . ' - value: ' . $value ); | |
} | |
$context = 'custom_field'; | |
$remove_stops = true; | |
if ( '_relevanssi_pdf_content' === $field ) { | |
$context = 'body'; | |
$remove_stops = 'body'; | |
} | |
/** This filter is documented in lib/indexing.php */ | |
$value_tokens = apply_filters( | |
'relevanssi_indexing_tokens', | |
relevanssi_tokenize( $value, $remove_stops, $min_word_length ), | |
$context | |
); | |
foreach ( $value_tokens as $token => $count ) { | |
$n++; | |
if ( ! isset( $insert_data[ $token ]['customfield'] ) ) { | |
$insert_data[ $token ]['customfield'] = 0; | |
} | |
$insert_data[ $token ]['customfield'] += $count; | |
// Premium indexes more detail about custom fields. | |
if ( function_exists( 'relevanssi_customfield_detail' ) ) { | |
$insert_data = relevanssi_customfield_detail( | |
$insert_data, | |
$token, | |
$count, | |
$field | |
); | |
} | |
} | |
} | |
} | |
return $n; | |
} | |
/** | |
* Creates indexing queries for the excerpt content. | |
* | |
* @param array $insert_data The INSERT query data. Modified here. | |
* @param string $excerpt The post excerpt to index. | |
* @param int $min_word_length The minimum word length. | |
* @param boolean $debug If true, print out debug notices. | |
* | |
* @return int The number of tokens added to the data. | |
*/ | |
function relevanssi_index_excerpt( &$insert_data, $excerpt, $min_word_length, $debug ) { | |
$n = 0; | |
// Include excerpt for attachments which use post_excerpt for captions - modified by renaissancehack. | |
if ( $debug ) { | |
relevanssi_debug_echo( "Indexing post excerpt: $excerpt" ); | |
} | |
/** This filter is documented in common/indexing.php */ | |
$excerpt_tokens = apply_filters( | |
'relevanssi_indexing_tokens', | |
relevanssi_tokenize( $excerpt, true, $min_word_length ), | |
'excerpt' | |
); | |
foreach ( $excerpt_tokens as $token => $count ) { | |
$n++; | |
if ( ! isset( $insert_data[ $token ]['excerpt'] ) ) { | |
$insert_data[ $token ]['excerpt'] = 0; | |
} | |
$insert_data[ $token ]['excerpt'] += $count; | |
} | |
return $n; | |
} | |
/** | |
* Creates indexing queries for post title. | |
* | |
* @param array $insert_data The INSERT query data. Modified here. | |
* @param object $post The post object. | |
* @param int $min_word_length The minimum word length. | |
* @param boolean $debug If true, print out debug notices. | |
* | |
* @return int The number of tokens added to the data. | |
*/ | |
function relevanssi_index_title( &$insert_data, $post, $min_word_length, $debug ) { | |
$n = 0; | |
if ( empty( $post->post_title ) ) { | |
return 0; | |
} | |
/** | |
* If this filter returns false, titles are not indexed at all. | |
* | |
* @param boolean Return false to prevent titles from being indexed. Default true. | |
*/ | |
if ( ! apply_filters( 'relevanssi_index_titles', true ) ) { | |
return 0; | |
} | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Indexing post title.' ); | |
} | |
/** This filter is documented in wp-includes/post-template.php */ | |
$filtered_title = apply_filters( 'the_title', $post->post_title, $post->ID ); | |
/** | |
* Filters the title before tokenizing and indexing. | |
* | |
* @param string $post->post_title The title. | |
* @param object $post The full post object. | |
*/ | |
$filtered_title = apply_filters( 'relevanssi_post_title_before_tokenize', $filtered_title, $post ); | |
$title_tokens = relevanssi_tokenize( | |
$filtered_title, | |
/** | |
* Filters whether stopwords should be removed from titles in tokenizing or not. | |
* | |
* @param boolean If true, remove stopwords. Default true. | |
*/ | |
apply_filters( 'relevanssi_remove_stopwords_in_titles', true ), | |
$min_word_length | |
); | |
/** This filter is documented in lib/indexing.php */ | |
$title_tokens = apply_filters( 'relevanssi_indexing_tokens', $title_tokens, 'title' ); | |
if ( $debug ) { | |
relevanssi_debug_echo( "\tTitle, tokenized: " . implode( ' ', array_keys( $title_tokens ) ) ); | |
} | |
foreach ( $title_tokens as $token => $count ) { | |
$n++; | |
if ( ! isset( $insert_data[ $token ]['title'] ) ) { | |
$insert_data[ $token ]['title'] = 0; | |
} | |
$insert_data[ $token ]['title'] += $count; | |
} | |
return $n; | |
} | |
/** | |
* Creates indexing queries for post content. | |
* | |
* @param array $insert_data The INSERT query data. Modified here. | |
* @param object $post The post object. | |
* @param int $min_word_length The minimum word length. | |
* @param boolean $debug If true, print out debug notices. | |
* | |
* @return int The number of tokens added to the data. | |
*/ | |
function relevanssi_index_content( &$insert_data, $post, $min_word_length, $debug ) { | |
$n = 0; | |
/** | |
* If this filter returns false, post content is not indexed at all. | |
* | |
* @param boolean Return false to prevent post content from being indexed. Default true. | |
*/ | |
if ( ! apply_filters( 'relevanssi_index_content', true ) ) { | |
return $n; | |
} | |
if ( $debug ) { | |
relevanssi_debug_echo( 'Indexing post content.' ); | |
} | |
remove_shortcode( 'noindex' ); | |
add_shortcode( 'noindex', 'relevanssi_noindex_shortcode_indexing' ); | |
/** | |
* Filters the post content before indexing. | |
* | |
* @param string $post->post_content The post content. | |
* @param object $post The full post object. | |
*/ | |
$contents = apply_filters( 'relevanssi_post_content', $post->post_content, $post ); | |
if ( $debug ) { | |
relevanssi_debug_echo( "\tPost content after relevanssi_post_content:\n$contents" ); | |
} | |
/** | |
* Can be used to add extra content to the post before indexing. | |
* | |
* @author Alexander Gieg | |
* | |
* @param string The additional content. | |
* @param object $post The post object. | |
*/ | |
$additional_content = trim( apply_filters( 'relevanssi_content_to_index', '', $post ) ); | |
if ( ! empty( $additional_content ) ) { | |
$contents .= ' ' . $additional_content; | |
if ( $debug ) { | |
relevanssi_debug_echo( "\tAdditional content from relevanssi_content_to_index:\n$additional_content" ); | |
} | |
} | |
if ( 'on' === get_option( 'relevanssi_expand_shortcodes' ) ) { | |
// TablePress support. | |
if ( function_exists( 'relevanssi_enable_tablepress_shortcodes' ) ) { | |
$tablepress_controller = relevanssi_enable_tablepress_shortcodes(); | |
} | |
relevanssi_disable_shortcodes(); | |
$post_before_shortcode = $post; | |
$contents = do_shortcode( $contents ); | |
$post = $post_before_shortcode; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited | |
unset( $tablepress_controller ); | |
} else { | |
$contents = strip_shortcodes( $contents ); | |
} | |
remove_shortcode( 'noindex' ); | |
add_shortcode( 'noindex', 'relevanssi_noindex_shortcode' ); | |
/** | |
* Filters the post content after shortcodes but before HTML stripping. | |
* | |
* @param string $contents The post content. | |
* @param object $post The full post object. | |
*/ | |
$contents = apply_filters( | |
'relevanssi_post_content_after_shortcodes', | |
$contents, | |
$post | |
); | |
$contents = relevanssi_strip_invisibles( $contents ); | |
// Premium feature for better control over internal links. | |
if ( function_exists( 'relevanssi_process_internal_links' ) ) { | |
$contents = relevanssi_process_internal_links( $contents, $post->ID ); | |
} | |
$contents = preg_replace( '/<[a-zA-Z\/][^>]*>/', ' ', $contents ); | |
$contents = wp_strip_all_tags( $contents ); | |
/** | |
* Filters the post content in indexing before tokenization. | |
* | |
* @param string $contents The post content. | |
* @param object $post The full post object. | |
*/ | |
$contents = apply_filters( 'relevanssi_post_content_before_tokenize', $contents, $post ); | |
/** This filter is documented in lib/indexing.php */ | |
$content_tokens = apply_filters( | |
'relevanssi_indexing_tokens', | |
relevanssi_tokenize( $contents, 'body', $min_word_length ), | |
'content' | |
); | |
if ( $debug ) { | |
relevanssi_debug_echo( "\tContent, tokenized:\n" . implode( ' ', array_keys( $content_tokens ) ) ); | |
} | |
foreach ( $content_tokens as $token => $count ) { | |
$n++; | |
if ( ! isset( $insert_data[ $token ]['content'] ) ) { | |
$insert_data[ $token ]['content'] = 0; | |
} | |
$insert_data[ $token ]['content'] += $count; | |
} | |
return $n; | |
} | |
/** | |
* Disables problematic shortcode before Relevanssi indexing to avoid problems. | |
* | |
* Uses the `relevanssi_disabled_shortcodes` filter hook to filter the | |
* shortcodes. The disabled shortcodes are first removed with | |
* remove_shortcode() and then given a reference to `__return_empty_string`. | |
* | |
* The option `relevanssi_disable_shortcodes` is also supported for legacy | |
* reasons, but it's better to use the filter instead. | |
*/ | |
function relevanssi_disable_shortcodes() { | |
$default_disables = array( | |
'contact-form', // Jetpack Contact Form causes an error message. | |
'starrater', // GD Star Rating rater shortcode causes problems. | |
'responsive-flipbook', // Responsive Flipbook causes problems. | |
'avatar_upload', // WP User Avatar is incompatible. | |
'product_categories', // A problematic WooCommerce shortcode. | |
'recent_products', // A problematic WooCommerce shortcode. | |
'php', // PHP Code for Posts. | |
'watupro', // Watu PRO doesn't co-operate. | |
'starbox', // Starbox shortcode breaks Relevanssi. | |
'cfdb-save-form-post', // Contact Form DB. | |
'cfdb-datatable', | |
'cfdb-table', | |
'cfdb-json', | |
'cfdb-value', | |
'cfdb-count', | |
'cfdb-html', | |
'woocommerce_cart', // WooCommerce. | |
'woocommerce_checkout', | |
'woocommerce_order_tracking', | |
'woocommerce_my_account', | |
'woocommerce_edit_account', | |
'woocommerce_change_password', | |
'woocommerce_view_order', | |
'woocommerce_logout', | |
'woocommerce_pay', | |
'woocommerce_thankyou', | |
'woocommerce_lost_password', | |
'woocommerce_edit_address', | |
'tc_process_payment', | |
'maxmegamenu', // Max Mega Menu. | |
'searchandfilter', // Search and Filter. | |
'downloads', // Easy Digital Downloads. | |
'download_history', | |
'purchase_history', | |
'download_checkout', | |
'purchase_link', | |
'download_cart', | |
'edd_profile_editor', | |
'edd_login', | |
'edd_register', | |
'swpm_protected', // Simple Membership Partially Protected content. | |
'gravityform', // Gravity Forms. | |
'sdm_latest_downloads', // SDM Simple Download Monitor. | |
'slimstat', // Slimstat Analytics. | |
'ninja_tables', // Ninja Tables. | |
); | |
$disable_shortcodes = get_option( 'relevanssi_disable_shortcodes' ); | |
$shortcodes = explode( ',', $disable_shortcodes ); | |
/** | |
* Filters the shortcodes Relevanssi disables while indexing posts. | |
* | |
* @param array An array of shortcodes disabled. | |
* | |
* @return array An array of shortcodes disabled. | |
*/ | |
$shortcodes = apply_filters( | |
'relevanssi_disabled_shortcodes', | |
array_unique( array_merge( $shortcodes, $default_disables ) ) | |
); | |
foreach ( $shortcodes as $shortcode ) { | |
if ( empty( $shortcode ) ) { | |
continue; | |
} | |
remove_shortcode( trim( $shortcode ) ); | |
add_shortcode( trim( $shortcode ), '__return_empty_string' ); | |
} | |
} | |
/** | |
* Converts INSERT query data array to query values. | |
* | |
* Takes the collected data and converts it to values that can be fed into | |
* an INSERT query using $wpdb->prepare(). Provides filters to modify the | |
* insert query values before and after the conversion. | |
* | |
* @global $wpdb The WordPress database interface. | |
* @global $relevanssi_variables Used for the Relevanssi db table name. | |
* | |
* @param array $insert_data An array of term => data pairs, where data has | |
* token counts for the term in different contexts. | |
* @param object $post The indexed post object. | |
* | |
* @return array An array of values clauses for an INSERT query. | |
*/ | |
function relevanssi_convert_data_to_values( $insert_data, $post ) { | |
global $wpdb, $relevanssi_variables; | |
$charset = $wpdb->get_col_charset( | |
$relevanssi_variables['relevanssi_table'], | |
'term' | |
); | |
/** | |
* Sets the indexed post 'type' column in the index. | |
* | |
* Default value is 'post', but other common values include 'attachment', | |
* 'user' and taxonomy name. | |
* | |
* @param string Type value. | |
* @param object The post object for the current post. | |
*/ | |
$type = apply_filters( 'relevanssi_index_get_post_type', 'post', $post ); | |
/** | |
* Filters the indexing data before it is converted to INSERT queries. | |
* | |
* @param array $insert_data All the tokens and their counts. | |
* @param object $post The post object. | |
*/ | |
$insert_data = apply_filters( 'relevanssi_indexing_data', $insert_data, $post ); | |
$values = array(); | |
foreach ( $insert_data as $term => $data ) { | |
$content = isset( $data['content'] ) ? $data['content'] : 0; | |
$title = isset( $data['title'] ) ? $data['title'] : 0; | |
$comment = isset( $data['comment'] ) ? $data['comment'] : 0; | |
$tag = isset( $data['tag'] ) ? $data['tag'] : 0; | |
$link = isset( $data['link'] ) ? $data['link'] : 0; | |
$author = isset( $data['author'] ) ? $data['author'] : 0; | |
$category = isset( $data['category'] ) ? $data['category'] : 0; | |
$excerpt = isset( $data['excerpt'] ) ? $data['excerpt'] : 0; | |
$taxonomy = isset( $data['taxonomy'] ) ? $data['taxonomy'] : 0; | |
$customfield = isset( $data['customfield'] ) ? $data['customfield'] : 0; | |
$mysqlcolumn = isset( $data['mysqlcolumn'] ) ? $data['mysqlcolumn'] : 0; | |
$taxonomy_detail = isset( $data['taxonomy_detail'] ) ? $data['taxonomy_detail'] : ''; | |
$customfield_detail = isset( $data['customfield_detail'] ) ? $data['customfield_detail'] : ''; | |
if ( 'utf8' === $charset ) { | |
$term = wp_encode_emoji( $term ); | |
} | |
$term = trim( $term ); | |
$value = $wpdb->prepare( | |
'(%d, %s, REVERSE(%s), %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s, %s, %d)', | |
$post->ID, | |
$term, | |
$term, | |
$content, | |
$title, | |
$comment, | |
$tag, | |
$link, | |
$author, | |
$category, | |
$excerpt, | |
$taxonomy, | |
$customfield, | |
$type, | |
$taxonomy_detail, | |
$customfield_detail, | |
$mysqlcolumn | |
); | |
array_push( $values, $value ); | |
} | |
/** | |
* Filters the INSERT query VALUES sections before they are inserted in the INSERT query. | |
* | |
* @param array $values Value sets. | |
* @param object $post The post object. | |
*/ | |
return apply_filters( 'relevanssi_indexing_values', $values, $post ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment