Instantly share code, notes, and snippets.
Last active
August 29, 2015 14:11
-
Star
0
(0)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
Save keesiemeijer/41b6c8576a2f2ac684ce to your computer and use it in GitHub Desktop.
Proof of concept to get related words from WordPress post title and filename.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Random related posts with the plugin functions from the Related Posts by Taxonomy plugin. | |
// https://github.com/keesiemeijer/related-posts-by-taxonomy | |
// | |
// check if the plugin function exists | |
if ( function_exists( 'km_rpbt_related_posts_by_taxonomy' ) ) { | |
global $post; | |
$taxonomies = array( 'wp-parser-related-words' ); | |
$posts_per_page = 20; // posts you want to display | |
$args = array( | |
'posts_per_page' => -1, // get all related posts | |
'post_types' => $post->post_type, | |
//'exclude_terms' => array(), // exclude deprecated? | |
); | |
// Plugin function | |
$related_posts = km_rpbt_related_posts_by_taxonomy( $post->ID, $taxonomies, $args ); | |
if ( $related_posts ) { | |
// Randomize posts inside their own score | |
$i = 0; | |
$related_keys = $rand_keys = array(); | |
foreach ( (array) $related_posts as $key => $related ) { | |
if ( !$i++ || ( $temp_score === $related->score[0] ) ) { | |
$rand_keys[] = $key; | |
} else { | |
shuffle( $rand_keys ); | |
$related_keys = array_merge( $related_keys, $rand_keys ); | |
$rand_keys = array(); | |
$rand_keys[] = $key; | |
} | |
$temp_score = $related->score[0]; | |
} | |
shuffle( $rand_keys ); | |
$related_keys = array_merge( $related_keys, $rand_keys ); | |
$related_keys = array_slice( $related_keys, 0, $posts_per_page ); | |
// Display related posts | |
echo count($related_posts) . ' related posts found for ' . $post->post_title; | |
echo '<h2>Related Posts</h2>'; | |
echo '<ul>'; | |
// Loop through the related posts. | |
foreach ( (array) $related_keys as $key ) { | |
echo '<li><a href="' . get_permalink( $related_posts[ $key ]->ID ) . '">' . $related_posts[ $key ]->post_title . '</a>'; | |
echo ' (' . $related_posts[ $key ]->score[0] . ')</li>'; | |
} | |
echo '</ul>'; | |
} | |
} | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Related words from title functions. | |
*/ | |
/** | |
* Returns array with restricted words. | |
* | |
* @return array Array with restricted words | |
*/ | |
function get_restricted_words() { | |
return array( | |
'a', 'an', 'are', 'as', 'at', 'be', 'by', 'for', 'from', | |
'get', 'how', 'is', 'in', 'it', 'of', 'on', 'or', 'that', 'the', | |
'this', 'to', 'with', 'wp' | |
); | |
} | |
/** | |
* Returns words and word stems from a title. | |
* | |
* @param string $title Title to get words from. | |
* @param array $restricted Restricted words. | |
* @param array $allowed_first Allowed first words found in restricted words. | |
* @return array Array with Words and word stems from title. | |
*/ | |
function get_related_words_from_title( $title, $restricted = array(), $allowed_first = array() ) { | |
$words = $stemmed_words = $first_words = array(); | |
if ( empty( $title ) ) { | |
return array(); | |
} | |
// Returns the title seperated by underscores. | |
$title = sanitize_title_with_underscores( $title ); | |
// Gets all wp words and their stems. | |
$wp_words = get_wp_words( $title ); | |
// Create an array from the title. | |
$words = explode( '_', $title ); | |
// Remove 'wp' words from the words array. | |
$words = unset_word( 'wp', $words ); | |
// Gets the first word and its stem if it's in $allowed_first. | |
if ( isset( $words[0] ) ) { | |
if ( in_array( $words[0], $allowed_first ) ) { | |
$first_words[] = $words[0]; | |
$first_words[] = PorterStemmer::Stem( $words[0] ); | |
} | |
} | |
// Remove restricted words from the words array. | |
if ( !empty( $words ) ) { | |
foreach ( $words as $key => $word ) { | |
if ( in_array( $word, $restricted ) ) { | |
$words = unset_word( $word, $words ); | |
} | |
} | |
} | |
// Stem all remaining words in the words array. | |
if ( !empty( $words ) ) { | |
foreach ( $words as $word ) { | |
$stemmed_words[] = PorterStemmer::Stem( $word ); | |
} | |
} | |
// Merge all allowed words and stems | |
$allowed_words = array_merge( $first_words , $wp_words ); | |
$allowed_words = array_merge( $allowed_words, $words ); | |
$allowed_words = array_merge( $allowed_words, $stemmed_words ); | |
return array_unique( array_filter( $allowed_words ) ); | |
} | |
/** | |
* Returns wp words if 'wp' is found as a word in the title. | |
* WP words are: wp-{second_word}, second word and second word stem. | |
* | |
* @param string $title Title to get wp words from. | |
* @return array wp words | |
*/ | |
function get_wp_words( $title ) { | |
$wp_words = array(); | |
// 'wp' as a title is allowed. | |
if ( 'wp' === $title ) { | |
return array( 'wp' ); | |
} | |
if ( empty( $title ) ) { | |
return array(); | |
} | |
$words = explode( '_', $title ); | |
foreach ( $words as $key => $word ) { | |
if ( 'wp' !== $word ) { | |
continue; | |
} | |
if ( !isset( $words[$key+1] ) ) { | |
continue; | |
} | |
$wp_words[] = 'wp-' . $words[$key+1]; | |
$wp_words[] = $words[$key+1]; | |
$wp_words[] = PorterStemmer::Stem( $words[$key+1] ); | |
} | |
return array_values( array_unique( $wp_words ) ); | |
} | |
/** | |
* Removes word from array. | |
* | |
* @param string $word Word to be removed. | |
* @param array $words Words to remove a word from. | |
* @return array Array with words removed. | |
*/ | |
function unset_word( $word, $words ) { | |
foreach ( $words as $key => $value ) { | |
if ( $word === $value ) { | |
unset( $words[ $key ] ); | |
} | |
} | |
return array_values( $words ); | |
} | |
/** | |
* Sanitizes titles with underscores. | |
* Convers string to lower case. | |
* Replaces all non alphanumeric characters with an underscore. | |
* | |
* @param string $title Title to sanitize. | |
* @return string Sanitized title. | |
*/ | |
function sanitize_title_with_underscores( $title ) { | |
$title = strtolower( $title ); | |
// remove all non letter characters to underscores | |
$title = preg_replace( '/[^a-z0-9]/', '_', $title ); | |
// replace multiple underscores with a single underscore | |
$title = preg_replace( '/_+/', '_', $title ); | |
return trim( $title, '_' ); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Proof of concept to get related words from WordPress post title and filename. | |
echo related_title_words_table(); | |
function related_title_words_table() { | |
if ( !is_post_type_archive() ) { | |
return ''; | |
} | |
$post_type = get_query_var( 'post_type' ); | |
$restricted_words = get_restricted_words(); | |
// Allowed words if it's the first word. | |
$allowed_first_words = array( | |
'is', // is_* functions | |
'get', // get_* functions | |
); | |
$word_count = 0; | |
$count_title = ''; // title with the most (related) words. | |
// Get all posts from the post type. | |
$post_type_posts = get_posts( "post_type={$post_type}&posts_per_page=-1" ); | |
// Table output. | |
$table = '<table><thead><tr><th>Title</th><th>Related Words</th><th>Skipped Words</th></tr></thead><tbody>'; | |
foreach ( $post_type_posts as $post ) { | |
$word_str = $skipped_word_str = ''; | |
$title = $post->post_title; | |
if ( empty( $title ) ) { | |
continue; | |
} | |
if ( 'wp-parser-method' === $post_type ) { | |
// Get related words for class and method separately. | |
$method_titles = explode( "::", $post->post_title ); | |
$method_words = array(); | |
foreach ( $method_titles as $method_title ) { | |
$method_tiltle_words = get_related_words_from_title( $method_title, $restricted_words, $allowed_first_words ); | |
$method_words = array_merge( $method_words, $method_tiltle_words ); | |
} | |
$words = array_unique( array_filter( $method_words ) ); | |
} else { | |
// Get all related words and their stems. | |
$words = get_related_words_from_title( $title, $restricted_words, $allowed_first_words ); | |
} | |
if ( !empty( $words ) ) { | |
// Start of table row. | |
$table .= "<tr><td>" . $post->post_title . '</td>'; | |
// Get words from post title to check against | |
$_title = sanitize_title_with_underscores( $title ); | |
$_title_words = explode( '_', $_title ); | |
$_words = array(); | |
foreach ( $words as $key => $word ) { | |
if ( false === strrpos( $word, '-' ) ) { | |
// Not a wp-{second word} word. | |
if ( !in_array( $word, $_title_words ) ) { | |
// A stem word. | |
$_words[] = $word; | |
$word_str .= ' <span style="color:orange; padding-right:1em;">' . $word . '</span>'; | |
continue; | |
} | |
} | |
$_words[] = $word; | |
$word_str .= ' <span style="color:green; padding-right:1em;">' . $word . '</span>'; | |
} | |
// Check for skipped words. | |
foreach ( $_title_words as $title_word ) { | |
if ( !( in_array( $title_word, $words ) && in_array( $title_word, $_words ) ) ) { | |
$skipped_word_str .= ' <span style="color:red; padding-right:1em;">' . $title_word . '</span>'; | |
} | |
} | |
// Add the filename to the words array. | |
$file = get_source_file( $post->ID ); | |
if ( !empty( $file ) ) { | |
$file_parts = pathinfo( $file ); | |
$file_name = sanitize_title( $file_parts['basename'] ); | |
$words[] = $file_name; | |
$word_str .= ' <span style="color:CornflowerBlue; padding-right:1em;">' . $file_name .'</span>'; | |
} | |
// End of table row. | |
$table .= '<td>' . $word_str . '</td><td>' . $skipped_word_str . '</td></tr>'; | |
if ( count( $words ) > $word_count ) { | |
$word_count = count( $words ); | |
$count_title = $title; | |
} | |
// insert words as terms. | |
// wp_set_object_terms( $post->ID, $words, 'wp-parser-related-words', false ); | |
} // if ( !empty( $words ) ) | |
} // foreach post | |
// End of table output. | |
$table .= '</tbody></table>'; | |
$out = '<p>' . $count_title . ' has the most (related) words. ' . $word_count . ' words.</p>'; | |
return $out . $table; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The output of related words can be seen here:
Functions
https://rawgit.com/keesiemeijer/b8ba0b01006d6d859919/raw/poc-related-words-functions.html
Classes
https://rawgit.com/keesiemeijer/0727a611ee3d171a5ea0/raw/poc-related-words-classes.html
Methods
https://rawgit.com/keesiemeijer/49d3d62068be351e7adb/raw/poc-related-words-methods.html
Hooks
https://rawgit.com/keesiemeijer/7403f273deeb546389d8/raw/poc-related-words-hooks.html