Skip to content

Instantly share code, notes, and snippets.

@robertuniqid
Last active July 10, 2016 23:58
Show Gist options
  • Save robertuniqid/d0dc917fccabb3dcd87351e8bc93d2f7 to your computer and use it in GitHub Desktop.
Save robertuniqid/d0dc917fccabb3dcd87351e8bc93d2f7 to your computer and use it in GitHub Desktop.
WPEP - Extract Image from different paths within WordPress from JSON data. Can be adapted.
<?php
namespace WPEPAddOnImportExport\Coordinator;
class AssetMatcher {
public $website_url;
public function __construct() {
$this->website_url = get_site_url();
}
/**
* @param $json
* @return array
*/
public function extract_from_json( $json ) {
return $this->extract_from_array( json_decode( $json, true ) );
}
/**
* @param $content_information
* @return array
*/
public function extract_from_array( $content_information ) {
$response = $this->find_assets_from_entry( $content_information, $this->_get_extract_from_array_args() );
return apply_filters( 'wpep_addon_ie_asset_matcher_result', array_unique( $response ), $content_information, $this->_get_extract_from_array_args() );
}
/**
* We have the thumbnail id returned as the key, attachment_id => attachment_url
* @return array
*/
private function _get_extract_from_array_args() {
$args = array(
'courses' => array(
'meta' => array(
'wpep_course_description',
'_cic-thumb-image',
'_cic-icon-image'
),
'section_list' => array(
'content',
'lessons' => array( 'content' )
)
),
'taxonomies' => array(
'course_category' => array(
'meta' => array(
'icon' => array(
'url'
)
)
)
),
'videos' => array(
'post_information' => array( 'post_content' )
),
'ebooks' => array(
'post_information' => array( 'post_content' ),
'meta' => array( '_thumbnail_id' )
)
);
$args = apply_filters( 'wpep_addon_ie_asset_matcher_args', $args );
return $args;
}
/**
* @param $entries
* @param $path_map
* @return array
*/
public function find_assets_from_entries( $entries, $path_map ) {
$response = array();
foreach ($entries as $entry) {
$response += $this->find_assets_from_entry( $entry, $path_map );
}
return $response;
}
/**
* @param $entry
* @param $path_map
* @return array
*/
public function find_assets_from_entry( $entry, $path_map ) {
$response = array();
foreach( $path_map as $path_index => $path ) {
// We got to a section we care about, we're going to extract the information now.
if( is_numeric( $path_index ) && isset( $entry[ $path ] ) ) {
$response += $this->_find_assets_from_path_value( $entry[ $path ] );
continue;
}
// We don't care about this case.
if( !isset( $entry[ $path_index ] ) )
continue;
// We've reached an set of multiple entries, so we navigate accordingly.
if( is_array( $entry[ $path_index ] ) && is_numeric( key( $entry[ $path_index ] ) ) ) {
$response += $this->find_assets_from_entries( $entry[$path_index], $path );
continue;
}
// Moving 1 level deeper into the array.
if( is_array( $path ) ) {
$response += $this->find_assets_from_entry( $entry[ $path_index ], $path );
continue;
}
}
return $response;
}
/**
* @param $path_value
* @return array
*/
private function _find_assets_from_path_value( $path_value ) {
$response = array();
if( is_array( $path_value ) ) {
foreach( $path_value as $value )
$response += $this->_find_assets_from_string( $value );
} else {
$response += $this->_find_assets_from_string( $path_value );
}
return $response;
}
/**
* @param $content
* @return array
*/
private function _find_assets_from_string( $content ) {
$content = trim( $content );
if( strpos( $content, $this->website_url ) === 0 ) {
if( $this->_is_valid_resource( $content ) )
return array( $this->_attachment_url_to_id( $content ) => $content );
return array();
}
if( is_numeric( $content ) ) {
$content = intval( $content );
$attachment_url = wp_get_attachment_url( $content );
if( $attachment_url != false )
return array( $content => $attachment_url );
return array();
}
$response = array();
foreach( $this->_extract_from_dom( $content, '//img', 'src' ) as $target )
if( strpos( $target, $this->website_url ) === 0 && $this->_is_valid_resource( $target ) )
$response[ $this->_attachment_url_to_id( $target ) ] = $target;
return $response;
}
/**
* @param $content
* @param string $element_path
* @param bool $element_value
* @return array
*/
private function _extract_from_dom( $content, $element_path = '//img', $element_value = false ) {
$doc = new \DOMDocument();
$doc->loadHTML('<html><body>' . $content . '</body></html>');
$xml = simplexml_import_dom($doc);
$entries = $xml->xpath( $element_path );
if( empty( $entries ) )
return array();
$response = array();
foreach ( $entries as $entry ) {
if( $element_value == false ) {
$response[] = $entry;
continue;
}
if( isset( $entry[ $element_value ] ) )
$response[] = (string) $entry[ $element_value ]; // Stringify
}
return $response;
}
private function _is_valid_resource( $url ) {
$url_headers = get_headers( $url, 1 );
if( isset( $url_headers['Content-Type'] ) ){
$type = strtolower($url_headers['Content-Type']);
if( in_array( $type, array( 'image/png', 'image/jpg', 'image/jpeg', 'image/jpe', 'image/gif', 'image/tif', 'image/tiff', 'image/svg', 'image/ico', 'image/icon', 'image/x-icon' ) ) ){
return true;
}
}
return false;
}
private function _attachment_url_to_id( $image_url ) {
global $wpdb;
$attachment = $wpdb->get_col($wpdb->prepare("SELECT ID FROM $wpdb->posts WHERE guid='%s';", $image_url ));
return isset( $attachment[0] ) ? $attachment[0] : md5( $image_url );
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment