Last active
May 31, 2019 12:42
-
-
Save nczz/81c3a7a699e4d598cea9a85168bb959b to your computer and use it in GitHub Desktop.
WordPress Headless Replace Images From External Source
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
** Blogger 文章匯入使用內建匯入工具,不過圖片抓取機制鳥到爆,預設只會抓取文章封面圖,其他文章內的圖片都需要抓取的話就要跑這隻! | |
**/ | |
include 'wp-load.php'; | |
set_time_limit(0); | |
ini_set('memory_limit', '256M'); | |
add_action('after_setup_theme', function () { | |
add_filter('intermediate_image_sizes', '__return_empty_array'); | |
add_filter('wp_get_attachment_image_src', function ($image, $attachment_id, $size, $icon) { | |
// get a thumbnail or intermediate image if there is one | |
$image = image_downsize($attachment_id, 'full'); | |
if (!$image) { | |
$src = false; | |
if ($icon && $src = wp_mime_type_icon($attachment_id)) { | |
/** This filter is documented in wp-includes/post.php */ | |
$icon_dir = apply_filters('icon_dir', ABSPATH . WPINC . '/images/media'); | |
$src_file = $icon_dir . '/' . wp_basename($src); | |
@list($width, $height) = getimagesize($src_file); | |
} | |
if ($src && $width && $height) { | |
$image = array($src, $width, $height); | |
} | |
} | |
return $image; | |
}, 999, 4); | |
}); | |
function logger($file, $data) { | |
error_log($file . " => " . $data . PHP_EOL, 3, dirname(__FILE__) . "/replace.log"); | |
} | |
function download_source($url, $dest) { | |
$file_name = $dest; | |
$buffer_size = 4096; // read 4kb at a time | |
if (file_exists($file_name)) { | |
return $file_name; | |
} | |
$fp = fopen($file_name, 'w'); | |
$ch = curl_init($url); | |
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'); | |
curl_setopt($ch, CURLOPT_REFERER, $url); | |
curl_setopt($ch, CURLOPT_FAILONERROR, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_FILE, $fp); | |
$data = curl_exec($ch); | |
if (curl_error($ch)) { | |
$error_message = curl_error($ch); | |
logger('request_download_source_error', print_r("LINK: {$url} | ERRMSG: $error_message", true)); | |
} | |
curl_close($ch); | |
fclose($fp); | |
return $file_name; | |
} | |
function parsing_image($post_id, $content) { | |
$content = preg_replace_callback('/<img\s+.*?src=[\"\'](http[s]{0,1}\:\/\/?[^\"\' >]*)[\"\']?[^>]*>/i', | |
function ($res) { | |
$pos = strpos($res[1], site_url()); | |
if ($pos === false) { | |
$upload_dir = wp_upload_dir(); | |
$tmp_name = join('-', explode('/', $res[1])); | |
$tmp_name = join('-', explode('.', $tmp_name)); | |
$tmp_name = join('-', explode('?', $tmp_name)); | |
$tmp_name = join('-', explode('&', $tmp_name)); | |
$tmp_name = join('-', explode('%', $tmp_name)); | |
$tmp_name = join('-', explode(';', $tmp_name)); | |
$tmp_name = join('-', explode('=', $tmp_name)); | |
$tmp_name = md5(explode(':', $tmp_name)[1]); | |
$file_path = "{$upload_dir['basedir']}/{$tmp_name}"; | |
download_source($res[1], $file_path); | |
$info = getimagesize($file_path); | |
$type = explode('/', $info['mime']); | |
$type = end($type); | |
switch ($type) { | |
case 'gif': | |
rename($file_path, $file_path . ".gif"); | |
$tmp_name .= ".gif"; | |
break; | |
case 'jpeg': | |
rename($file_path, $file_path . ".jpg"); | |
$tmp_name .= ".jpg"; | |
break; | |
case 'png': | |
rename($file_path, $file_path . ".png"); | |
$tmp_name .= ".png"; | |
break; | |
default: | |
rename($file_path, $file_path . ".jpg"); | |
$tmp_name .= ".jpg"; | |
break; | |
} | |
$doc = new DOMDocument(); | |
$doc->loadHTML($res[0]); | |
$tags = $doc->getElementsByTagName('img'); | |
if (count($tags) > 0) { | |
$tag = $tags->item(0); | |
$tag->setAttribute('src', "{$upload_dir['baseurl']}/{$tmp_name}"); | |
return $doc->saveXML($tag); | |
} | |
} else { | |
return $res[0]; | |
} | |
}, | |
$content); | |
preg_match_all('/<img\s+.*?src=[\"\'](http[s]{0,1}\:\/\/?[^\"\' >]*)[\"\']?[^>]*>/i', $content, $matches); | |
$images = $matches[1]; | |
//logger('parsing_image', print_r($matches, true)); | |
mxp_import_image($post_id, $images); | |
$content = preg_replace('/<p.*?>/s', "<p>", $content); | |
$content = preg_replace('/<div.*?>/s', "<div>", $content); | |
$remove_divs = array("<div>", "</div>"); | |
$content = str_replace($remove_divs, "", $content); | |
return $content; | |
} | |
function mxp_import_image($post_id, $imgs) { | |
require_once ABSPATH . 'wp-admin/includes/media.php'; | |
require_once ABSPATH . 'wp-admin/includes/file.php'; | |
require_once ABSPATH . 'wp-admin/includes/image.php'; | |
global $wpdb; | |
$filename = array(); | |
$upload_file = array(); | |
$upload_dir = wp_upload_dir(); | |
for ($i = 0; $i < count($imgs); ++$i) { | |
$img_filename = str_replace($upload_dir['baseurl'] . "/", "", $imgs[$i]); | |
$img_dir_path = str_replace($upload_dir['baseurl'], $upload_dir['basedir'], $imgs[$i]); | |
$pos = strpos($imgs[$i], site_url()); | |
if (!file_exists($img_dir_path) && $pos === false) { | |
$filename[] = $img_filename; | |
$upload_file[] = $img_dir_path; | |
} else { | |
echo "已有 {$img_filename} 此媒體。位在: {$img_dir_path}" . PHP_EOL; | |
$attachment_id = $wpdb->get_results( | |
$wpdb->prepare( | |
"SELECT ID FROM $wpdb->posts WHERE guid LIKE %s", '%' . pathinfo(str_replace($upload_dir['baseurl'] . "/", "", $imgs[$i]), PATHINFO_FILENAME) . '%' | |
), | |
ARRAY_A | |
); | |
$media_post = wp_update_post(array( | |
'ID' => $attachment_id[0]['ID'], | |
'post_parent' => $post_id, | |
), true); | |
if (is_wp_error($media_post)) { | |
$filename[] = $img_filename; | |
$upload_file[] = $img_dir_path; | |
} | |
} | |
} | |
//logger('mxp_import_image', print_r($filename, true)); | |
// 如果上傳沒失敗,就附加到剛剛那篇文章 | |
$set_feature_image = true; | |
for ($i = 0; $i < count($upload_file); ++$i) { | |
if (isset($upload_file[$i]) && $upload_file[$i] != "" && isset($filename[$i]) && $filename[$i] != "") { | |
$wp_filetype = wp_check_filetype($filename[$i], null); | |
$attachment = array( | |
'post_mime_type' => $wp_filetype['type'], | |
'post_parent' => $post_id, | |
'post_title' => preg_replace('/\.[^.]+$/', '', $filename[$i]), | |
'post_content' => '', | |
'post_status' => 'inherit', | |
); | |
$attachment_id = wp_insert_attachment($attachment, $upload_file[$i], $post_id); | |
if (!is_wp_error($attachment_id)) { | |
//產生附加檔案中繼資料 | |
$attachment_data = wp_generate_attachment_metadata($attachment_id, $upload_file[$i]); | |
wp_update_attachment_metadata($attachment_id, $attachment_data); | |
//將圖像的附加檔案設為特色圖片 | |
$type = explode("/", $wp_filetype['type']); | |
if ($set_feature_image == true && $type[0] == 'image') { | |
set_post_thumbnail($post_id, $attachment_id); | |
$set_feature_image = false; | |
} | |
echo "匯入 {$filename[$i]} 媒體。位在: {$upload_file[$i]}" . PHP_EOL; | |
} | |
} | |
} | |
} | |
global $wpdb; | |
$querystr = "SELECT ID,post_content FROM $wpdb->posts WHERE $wpdb->posts.post_status = 'publish' AND $wpdb->posts.post_type = 'post' AND $wpdb->posts.post_date < NOW() ORDER BY $wpdb->posts.post_date DESC"; | |
$posts = $wpdb->get_results($querystr, ARRAY_A); | |
foreach ($posts as $key => $post) { | |
$post_id = $post['ID']; | |
$post_content = $post['post_content']; | |
//匯入圖片 | |
$update_attachment_post = array( | |
'ID' => $post_id, | |
'post_content' => parsing_image($post_id, $post_content), | |
); | |
$update_attachment_post['post_excerpt'] = wp_trim_words($update_attachment_post['post_content'], 200, '...'); | |
$upid = wp_update_post($update_attachment_post); | |
echo $post_id . " => 完成匯入!" . PHP_EOL; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment