dmsnell/truncate-html.php Secret

## truncate-html.php
<?php

/**
 * Returns a portion of an HTML document including up to a given number of code points.
 *
 * Example:
 *
 *     'Just <a href="#">a link</a>' = truncate_html( 'Just <a href="#">a link</a> to content.', 11 );
 *     'Just <a href="#">a l</a>'    = truncate_html( 'Just <a href="#">a link</a> to content.', 8 );
 *     'Just'                        = truncate_html( 'Just <a href="#">a link</a> to content.', 4 );
 *
 * @param string $document        Input HTML document to truncate.
 * @param int    $codepoint_count Max code points in textContent of output.
 * @return string Portion of HTML document containing up to max code points, including original HTML formatting.
 */
function truncate_html( $document, $codepoint_count ) {
	// Invalid calls. Should these raise an error somewhere?
	if ( ! is_int( $codepoint_count ) || $codepoint_count < 1 ) {
		return '';
	}

	// If the document contains fewer bytes than are allowed as code points
	// then it's safe to return the entire thing and avoid additional
	// processing because byte counts are strictly >= code point counts.
	if ( strlen( $document ) < $codepoint_count ) {
		return $document;
	}

	// If the input cannot contain any HTML tags then it must all be
	// plaintext and the mb_substr function will return the same result.
	if ( ! str_contains( $document, '<' ) ) {
		return mb_substr( html_entity_decode( $document, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ), 0, $codepoint_count );
	}

	$text_length = 0;
	$processor   = new WP_HTML_Processor( $document );
	while ( $processor->next_token() ) {
		if ( '#text' === $processor->get_node_name() ) {
			$text_chunk   = $processor->get_node_text();
			$chunk_length = mb_strlen( html_entity_decode( $text_chunk, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) );

			if ( $text_length + $chunk_length >= $codepoint_count ) {
				$processor->set_bookmark( 'here' );
				return $processor->serializeUntil( 'here' );
			}

			$text_length += $chunk_length;
		}
	}

	// By now the document fits within the requested limit,
	// otherwise it would have returned already.
	return $document;
}
	<?php

	/**
	* Returns a portion of an HTML document including up to a given number of code points.
	*
	* Example:
	*
	* 'Just <a href="#">a link</a>' = truncate_html( 'Just <a href="#">a link</a> to content.', 11 );
	* 'Just <a href="#">a l</a>' = truncate_html( 'Just <a href="#">a link</a> to content.', 8 );
	* 'Just' = truncate_html( 'Just <a href="#">a link</a> to content.', 4 );
	*
	* @param string $document Input HTML document to truncate.
	* @param int $codepoint_count Max code points in textContent of output.
	* @return string Portion of HTML document containing up to max code points, including original HTML formatting.
	*/
	function truncate_html( $document, $codepoint_count ) {
	// Invalid calls. Should these raise an error somewhere?
	if ( ! is_int( $codepoint_count ) \|\| $codepoint_count < 1 ) {
	return '';
	}

	// If the document contains fewer bytes than are allowed as code points
	// then it's safe to return the entire thing and avoid additional
	// processing because byte counts are strictly >= code point counts.
	if ( strlen( $document ) < $codepoint_count ) {
	return $document;
	}

	// If the input cannot contain any HTML tags then it must all be
	// plaintext and the mb_substr function will return the same result.
	if ( ! str_contains( $document, '<' ) ) {
	return mb_substr( html_entity_decode( $document, ENT_QUOTES \| ENT_SUBSTITUTE \| ENT_HTML5 ), 0, $codepoint_count );
	}

	$text_length = 0;
	$processor = new WP_HTML_Processor( $document );
	while ( $processor->next_token() ) {
	if ( '#text' === $processor->get_node_name() ) {
	$text_chunk = $processor->get_node_text();
	$chunk_length = mb_strlen( html_entity_decode( $text_chunk, ENT_QUOTES \| ENT_SUBSTITUTE \| ENT_HTML5 ) );

	if ( $text_length + $chunk_length >= $codepoint_count ) {
	$processor->set_bookmark( 'here' );
	return $processor->serializeUntil( 'here' );
	}

	$text_length += $chunk_length;
	}
	}

	// By now the document fits within the requested limit,
	// otherwise it would have returned already.
	return $document;
	}