Skip to content

Instantly share code, notes, and snippets.

@nczz
Last active May 31, 2019 12:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nczz/81c3a7a699e4d598cea9a85168bb959b to your computer and use it in GitHub Desktop.
Save nczz/81c3a7a699e4d598cea9a85168bb959b to your computer and use it in GitHub Desktop.
WordPress Headless Replace Images From External Source
<?php
/**
** Blogger 文章匯入使用內建匯入工具,不過圖片抓取機制鳥到爆,預設只會抓取文章封面圖,其他文章內的圖片都需要抓取的話就要跑這隻!
**/
include 'wp-load.php';
set_time_limit(0);
ini_set('memory_limit', '256M');
add_action('after_setup_theme', function () {
add_filter('intermediate_image_sizes', '__return_empty_array');
add_filter('wp_get_attachment_image_src', function ($image, $attachment_id, $size, $icon) {
// get a thumbnail or intermediate image if there is one
$image = image_downsize($attachment_id, 'full');
if (!$image) {
$src = false;
if ($icon && $src = wp_mime_type_icon($attachment_id)) {
/** This filter is documented in wp-includes/post.php */
$icon_dir = apply_filters('icon_dir', ABSPATH . WPINC . '/images/media');
$src_file = $icon_dir . '/' . wp_basename($src);
@list($width, $height) = getimagesize($src_file);
}
if ($src && $width && $height) {
$image = array($src, $width, $height);
}
}
return $image;
}, 999, 4);
});
function logger($file, $data) {
error_log($file . " => " . $data . PHP_EOL, 3, dirname(__FILE__) . "/replace.log");
}
function download_source($url, $dest) {
$file_name = $dest;
$buffer_size = 4096; // read 4kb at a time
if (file_exists($file_name)) {
return $file_name;
}
$fp = fopen($file_name, 'w');
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36');
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_FILE, $fp);
$data = curl_exec($ch);
if (curl_error($ch)) {
$error_message = curl_error($ch);
logger('request_download_source_error', print_r("LINK: {$url} | ERRMSG: $error_message", true));
}
curl_close($ch);
fclose($fp);
return $file_name;
}
function parsing_image($post_id, $content) {
$content = preg_replace_callback('/<img\s+.*?src=[\"\'](http[s]{0,1}\:\/\/?[^\"\' >]*)[\"\']?[^>]*>/i',
function ($res) {
$pos = strpos($res[1], site_url());
if ($pos === false) {
$upload_dir = wp_upload_dir();
$tmp_name = join('-', explode('/', $res[1]));
$tmp_name = join('-', explode('.', $tmp_name));
$tmp_name = join('-', explode('?', $tmp_name));
$tmp_name = join('-', explode('&', $tmp_name));
$tmp_name = join('-', explode('%', $tmp_name));
$tmp_name = join('-', explode(';', $tmp_name));
$tmp_name = join('-', explode('=', $tmp_name));
$tmp_name = md5(explode(':', $tmp_name)[1]);
$file_path = "{$upload_dir['basedir']}/{$tmp_name}";
download_source($res[1], $file_path);
$info = getimagesize($file_path);
$type = explode('/', $info['mime']);
$type = end($type);
switch ($type) {
case 'gif':
rename($file_path, $file_path . ".gif");
$tmp_name .= ".gif";
break;
case 'jpeg':
rename($file_path, $file_path . ".jpg");
$tmp_name .= ".jpg";
break;
case 'png':
rename($file_path, $file_path . ".png");
$tmp_name .= ".png";
break;
default:
rename($file_path, $file_path . ".jpg");
$tmp_name .= ".jpg";
break;
}
$doc = new DOMDocument();
$doc->loadHTML($res[0]);
$tags = $doc->getElementsByTagName('img');
if (count($tags) > 0) {
$tag = $tags->item(0);
$tag->setAttribute('src', "{$upload_dir['baseurl']}/{$tmp_name}");
return $doc->saveXML($tag);
}
} else {
return $res[0];
}
},
$content);
preg_match_all('/<img\s+.*?src=[\"\'](http[s]{0,1}\:\/\/?[^\"\' >]*)[\"\']?[^>]*>/i', $content, $matches);
$images = $matches[1];
//logger('parsing_image', print_r($matches, true));
mxp_import_image($post_id, $images);
$content = preg_replace('/<p.*?>/s', "<p>", $content);
$content = preg_replace('/<div.*?>/s', "<div>", $content);
$remove_divs = array("<div>", "</div>");
$content = str_replace($remove_divs, "", $content);
return $content;
}
function mxp_import_image($post_id, $imgs) {
require_once ABSPATH . 'wp-admin/includes/media.php';
require_once ABSPATH . 'wp-admin/includes/file.php';
require_once ABSPATH . 'wp-admin/includes/image.php';
global $wpdb;
$filename = array();
$upload_file = array();
$upload_dir = wp_upload_dir();
for ($i = 0; $i < count($imgs); ++$i) {
$img_filename = str_replace($upload_dir['baseurl'] . "/", "", $imgs[$i]);
$img_dir_path = str_replace($upload_dir['baseurl'], $upload_dir['basedir'], $imgs[$i]);
$pos = strpos($imgs[$i], site_url());
if (!file_exists($img_dir_path) && $pos === false) {
$filename[] = $img_filename;
$upload_file[] = $img_dir_path;
} else {
echo "已有 {$img_filename} 此媒體。位在: {$img_dir_path}" . PHP_EOL;
$attachment_id = $wpdb->get_results(
$wpdb->prepare(
"SELECT ID FROM $wpdb->posts WHERE guid LIKE %s", '%' . pathinfo(str_replace($upload_dir['baseurl'] . "/", "", $imgs[$i]), PATHINFO_FILENAME) . '%'
),
ARRAY_A
);
$media_post = wp_update_post(array(
'ID' => $attachment_id[0]['ID'],
'post_parent' => $post_id,
), true);
if (is_wp_error($media_post)) {
$filename[] = $img_filename;
$upload_file[] = $img_dir_path;
}
}
}
//logger('mxp_import_image', print_r($filename, true));
// 如果上傳沒失敗,就附加到剛剛那篇文章
$set_feature_image = true;
for ($i = 0; $i < count($upload_file); ++$i) {
if (isset($upload_file[$i]) && $upload_file[$i] != "" && isset($filename[$i]) && $filename[$i] != "") {
$wp_filetype = wp_check_filetype($filename[$i], null);
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_parent' => $post_id,
'post_title' => preg_replace('/\.[^.]+$/', '', $filename[$i]),
'post_content' => '',
'post_status' => 'inherit',
);
$attachment_id = wp_insert_attachment($attachment, $upload_file[$i], $post_id);
if (!is_wp_error($attachment_id)) {
//產生附加檔案中繼資料
$attachment_data = wp_generate_attachment_metadata($attachment_id, $upload_file[$i]);
wp_update_attachment_metadata($attachment_id, $attachment_data);
//將圖像的附加檔案設為特色圖片
$type = explode("/", $wp_filetype['type']);
if ($set_feature_image == true && $type[0] == 'image') {
set_post_thumbnail($post_id, $attachment_id);
$set_feature_image = false;
}
echo "匯入 {$filename[$i]} 媒體。位在: {$upload_file[$i]}" . PHP_EOL;
}
}
}
}
global $wpdb;
$querystr = "SELECT ID,post_content FROM $wpdb->posts WHERE $wpdb->posts.post_status = 'publish' AND $wpdb->posts.post_type = 'post' AND $wpdb->posts.post_date < NOW() ORDER BY $wpdb->posts.post_date DESC";
$posts = $wpdb->get_results($querystr, ARRAY_A);
foreach ($posts as $key => $post) {
$post_id = $post['ID'];
$post_content = $post['post_content'];
//匯入圖片
$update_attachment_post = array(
'ID' => $post_id,
'post_content' => parsing_image($post_id, $post_content),
);
$update_attachment_post['post_excerpt'] = wp_trim_words($update_attachment_post['post_content'], 200, '...');
$upid = wp_update_post($update_attachment_post);
echo $post_id . " => 完成匯入!" . PHP_EOL;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment