Skip to content

Instantly share code, notes, and snippets.

@wpgaurav
Last active February 4, 2024 16:21
Show Gist options
  • Save wpgaurav/db40c0aa732fe583475fc6d8cf9cf509 to your computer and use it in GitHub Desktop.
Save wpgaurav/db40c0aa732fe583475fc6d8cf9cf509 to your computer and use it in GitHub Desktop.
Summarize WordPress post content using PHP
<?php
// A function that takes a content string and returns an array of up to 3 sentences with the highest scores
function gtcontent_summarize($content) {
$content = str_replace("’","'", $content);
$content = str_replace("; ",".", $content);
$content = remove_h2_to_h6_and_no_period($content);
$content = wp_strip_all_tags($content,true);
$content = wp_trim_words($content,700);
// Assume $sentences is a string variable that contains the sentences to be split
// Define an array of punctuation marks to use as delimiters
$punctuation = array(".", "!", "?", ":");
// Use preg_split function to split the string by the punctuation marks
// The PREG_SPLIT_NO_EMPTY flag will remove any empty elements from the resulting array
$sentences = preg_split("/[" . implode("", $punctuation) . "]/", $content, -1, PREG_SPLIT_NO_EMPTY);
// Initialize an array to store the scores of each sentence
$scores = array();
// Loop through each sentence
foreach ($sentences as $index => $sentence) {
// Split the sentence by white space and get the words
$words = preg_split('/\s+/', $sentence);
// Initialize the score of the current sentence to zero
$scores[$index] = 0;
// Loop through each word in the current sentence
foreach ($words as $word) {
// Loop through each other sentence
foreach ($sentences as $other_index => $other_sentence) {
// If the other sentence is not the same as the current sentence
if ($other_index != $index) {
// If the word exists in the other sentence, increase the score of the current sentence by one
if (stripos($other_sentence, $word) !== false) {
$scores[$index]++;
}
}
}
}
}
// Sort the sentences by their scores in descending order and preserve the keys
arsort($scores);
// Get the keys of the top three sentences with the highest scores
$keys = array_slice(array_keys($scores), 0, 5);
// Initialize an array to store the top sentences
$top_sentences = array();
// Loop through each key and get the corresponding sentence
foreach ($keys as $key) {
$top_sentences[] = $sentences[$key];
}
$top_sentences = array_values(array_intersect($sentences, $top_sentences));
// Return the array of top sentences
return $top_sentences;
}
function remove_h2_to_h6_and_no_period($string) {
// Split the string by line break
$lines = explode("\n", $string);
// Loop through each line
foreach ($lines as &$line) {
// Trim the whitespace from the line
$line = trim($line);
// Check if the line is empty
if ($line == "") {
// Skip this line
continue;
}
// Check if the line ends with a punctuation mark
if (!preg_match("/[.?!:]$/", $line)) {
// Append a period to the line
$line .= ".";
}
}
// Join the lines back into a string, also remove newline
$new_string = implode(" ", $lines);
// Return the new string
return $new_string;
}
function gtcontent_display_after_headline() {
global $post;
if ($post && get_post_type($post) == 'post') {
$content = get_the_content(null, false, $post);
$summary = gtcontent_summarize($content);
$output = '<aside class="hide-on-print"><details style="border: 1px solid #ddd; border-radius: 4px; padding: 10px; background-color: #FFFEF1; margin-bottom: 20px"><summary style="color: #333; font-weight: bold; cursor: pointer; padding: 5px;">Quick Summary</summary><div style="padding: 10px; color: #555; line-height: 1.6;"><ul class="list styled">';
foreach ($summary as $sentence) {
$output .= "<li>" . $sentence . ".</li>";
}
$output .= "</ul></div></details></aside>";
echo $output;
}
}
add_action('md_hook_content_item_text_top', 'gtcontent_display_after_headline'); // Replace md_hook_content_item_text_top with your hook
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment