A PHP script for importing a directory of images into WordPress, using a CSV file with a row of data about image.
Quite specific to my own needs, but might be useful to others.
See the comments in wp_import_images.php
for more info.
images/* | |
*.csv |
<?php | |
/** | |
* CSV file parser | |
* Currently the string matching doesn't work | |
* if the output encoding is not ASCII or UTF-8 | |
* | |
* From http://minghong.blogspot.co.uk/2006/07/csv-parser-for-php.html | |
*/ | |
class CsvFileParser | |
{ | |
var $delimiter; // Field delimiter | |
var $enclosure; // Field enclosure character | |
var $inputEncoding; // Input character encoding | |
var $outputEncoding; // Output character encoding | |
var $data; // CSV data as 2D array | |
/** | |
* Constructor | |
*/ | |
function CsvFileParser() | |
{ | |
$this->delimiter = ","; | |
$this->enclosure = '"'; | |
$this->inputEncoding = "ISO-8859-1"; | |
$this->outputEncoding = "ISO-8859-1"; | |
$this->data = array(); | |
} | |
/** | |
* Parse CSV from file | |
* @param content The CSV filename | |
* @param hasBOM Using BOM or not | |
* @return Success or not | |
*/ | |
function ParseFromFile( $filename, $hasBOM = false ) | |
{ | |
if ( !is_readable($filename) ) | |
{ | |
return false; | |
} | |
return $this->ParseFromString( file_get_contents($filename), $hasBOM ); | |
} | |
/** | |
* Parse CSV from string | |
* @param content The CSV string | |
* @param hasBOM Using BOM or not | |
* @return Success or not | |
*/ | |
function ParseFromString( $content, $hasBOM = false ) | |
{ | |
$content = iconv($this->inputEncoding, $this->outputEncoding, $content ); | |
$content = str_replace( "\r\n", "\n", $content ); | |
$content = str_replace( "\r", "\n", $content ); | |
if ( $hasBOM ) // Remove the BOM (first 3 bytes) | |
{ | |
$content = substr( $content, 3 ); | |
} | |
if ( $content[strlen($content)-1] != "\n" ) // Make sure it always end with a newline | |
{ | |
$content .= "\n"; | |
} | |
// Parse the content character by character | |
$row = array( "" ); | |
$idx = 0; | |
$quoted = false; | |
for ( $i = 0; $i < strlen($content); $i++ ) | |
{ | |
$ch = $content[$i]; | |
if ( $ch == $this->enclosure ) | |
{ | |
$quoted = !$quoted; | |
} | |
// End of line | |
if ( $ch == "\n" && !$quoted ) | |
{ | |
// Remove enclosure delimiters | |
for ( $k = 0; $k < count($row); $k++ ) | |
{ | |
if ( $row[$k] != "" && $row[$k][0] == $this->enclosure ) | |
{ | |
$row[$k] = substr( $row[$k], 1, strlen($row[$k]) - 2 ); | |
} | |
$row[$k] = str_replace( str_repeat($this->enclosure, 2), $this->enclosure, $row[$k] ); | |
} | |
// Append row into table | |
$this->data[] = $row; | |
$row = array( "" ); | |
$idx = 0; | |
} | |
// End of field | |
else if ( $ch == $this->delimiter && !$quoted ) | |
{ | |
$row[++$idx] = ""; | |
} | |
// Inside the field | |
else | |
{ | |
$row[$idx] .= $ch; | |
} | |
} | |
return true; | |
} | |
} | |
?> |
<?php | |
/** | |
* A script for importing a load of images, and data from a CSV, into WordPress. | |
* It creates a new 'attachment' Post for each image. | |
* It can also add custom metadata about the image. | |
* | |
* Run it like: | |
* | |
* $ php wp_import_images.php | |
* | |
* | |
* Assumptions: | |
* * The CSV file has one row per image. | |
* * One of the CSV columns refers to a particular image to be uploaded. | |
* | |
* To re-use this you'd need to make a few changes based on your needs: | |
* | |
* 1. Change the config variables below as needed. | |
* | |
* 2. The $COLUMNS should be changed to make sense depending on your CSV's | |
* structure. | |
* | |
* 3. Change how we determine whether a post already exists in the database, | |
* in create_posts(). | |
* | |
* 4. In create_posts() alter the bit that creates the $post_data array. You | |
* might want to change, add or delete items from this. The keys of the | |
* $data array there correspond to the names in $COLUMNS. | |
* | |
* 5. In get_original_file() alter how you find the correct file for a | |
* particular row of data. In our case it's based on a column we've called | |
* 'ref'. You'll need to fiddle with how the image is identified. e.g. | |
* if you had a column in your CSV that you've called 'file' you could | |
* replace the line starring `if (strpos(...` with: | |
* if ($filename == $data['file']) { | |
* | |
* 6. You might need to alter stuff in create_attachment() if you have | |
* different requirements for what gets saved with the Post. | |
* | |
* 7. In add_post_metadata() we add our custom metadata fields. You'll want to | |
* either delete all the custom ones (leave the _wp_attached_file line) or | |
* add your own if you have different ones. | |
*/ | |
////////////////////////////////////////////////////////////////////// | |
// CONFIGURATION | |
// All paths relative to this script. | |
// Setting this to TRUE won't move any files or save anything to the database. | |
// It will output a bit of debug code that helps ensure everything is in the | |
// right place before running properly. | |
$DRY_RUN = TRUE; | |
// Will be used to create all the Posts. | |
$AUTHOR_ID = 1; | |
// Path to the WordPress directory, relative to this script: | |
$WP_DIR = '../public/wp/'; | |
// Path relative to this script where the CSV file: | |
$CSV_FILE = './import.csv'; | |
// Directory where our images-to-be-uploaded are, relative to this script: | |
$IMAGE_SOURCE_DIR = './images/'; | |
// How many rows at the top of the CSV are to be ignored? | |
$HEADER_ROWS = 1; | |
// Useful names for each column in the CSV, in order. | |
// These are then used later to refer to each piece of data. | |
$COLUMNS = array( | |
'ref', | |
'title', | |
'photographer', | |
'publisher', | |
'date_taken', | |
'description', | |
'source', | |
'restrictions', | |
'no_pic' | |
); | |
// We will add the new Posts to these categories/tags in this taxonomy. | |
// Set $TAXONOMY to an empty string to not do this. | |
$TAXONOMY = 'media_category'; | |
$TERM_IDS = array(991); | |
// Path to the wp-load.php script, relative to this script: | |
$WP_LOAD_PATH = $WP_DIR . 'wp-load.php'; | |
// Path to the WP admin image.php file, relative to this script: | |
$WP_IMAGE_PATH = $WP_DIR . 'wp-admin/includes/image.php'; | |
// The WP uploads directory, relative to this script: | |
// We'll add the year and month to this below... | |
$WP_UPLOADS_DIR = $WP_DIR . 'wp-content/uploads/'; | |
////////////////////////////////////////////////////////////////////// | |
// SETUP... | |
require_once('csv_file_parser.php'); | |
if (! file_exists($CSV_FILE)) { | |
die(sprintf("Can't find the CSV file at '%s'.\n", $CSV_FILE)); | |
} | |
if (! file_exists($IMAGE_SOURCE_DIR)) { | |
die(sprintf( | |
"Can't find image source directory at '%s'.\n", $IMAGE_SOURCE_DIR)); | |
} else if (! is_dir($IMAGE_SOURCE_DIR)) { | |
die(sprintf( | |
"\$IMAGE_SOURCE_DIR isn't a directory: %s\n", $IMAGE_SOURCE_DIR)); | |
} | |
require_once($WP_LOAD_PATH); | |
if (! function_exists('wp')) { | |
die("Sorry, looks like WordPress isn't loaded.\n"); | |
} | |
require_once($WP_IMAGE_PATH); | |
if (! function_exists('wp_generate_attachment_metadata')) { | |
die("Sorry, looks like the WordPress image.php file isn't loaded.\n"); | |
} | |
// eg 'public/wp-content/uploads/2017/09/' | |
$IMAGE_DESTINATION_DIR = sprintf('%s%s/', $WP_UPLOADS_DIR, date('Y/m')); | |
if(! file_exists($IMAGE_DESTINATION_DIR)) { | |
printf("Creating uploads directory '%s'.\n", $IMAGE_DESTINATION_DIR); | |
mkdir($IMAGE_DESTINATION_DIR); | |
} | |
////////////////////////////////////////////////////////////////////// | |
// START! | |
if ($DRY_RUN) { | |
print("\nThis is a DRY RUN.\n\n"); | |
} else { | |
print("\nTHIS IS NOT A DRY RUN. THIS IS REALLY HAPPENING.\n\n"); | |
} | |
$IMAGES = scandir($IMAGE_SOURCE_DIR); | |
$rows = load_csv($CSV_FILE, $HEADER_ROWS); | |
create_posts($rows, $AUTHOR_ID); | |
////////////////////////////////////////////////////////////////////// | |
// THE FUNCTIONS | |
/** | |
* Loads a CSV file at $filepath and returns an array of arrays of data. | |
*/ | |
function load_csv($filepath, $header_rows=0) { | |
$parser = new CsvFileParser(); | |
$parser->inputEncoding = 'UTF-8'; | |
$parser->outputEncoding = 'UTF-8'; | |
$parser->ParseFromFile($filepath); | |
$rows = $parser->data; | |
$rows = array_slice($rows, $header_rows); | |
return $rows; | |
} | |
/** | |
* Does it all with the provided data. | |
* Creates the new posts, uploads the images, and associates them together. | |
* | |
* $rows is an array of rows, each one an array of column data. | |
* $author_id is the numeric ID of the WordPress author to use as the creator | |
* of the posts. | |
*/ | |
function create_posts($rows, $author_id) { | |
global $DRY_RUN, $wpdb; | |
$uploads = wp_upload_dir(); | |
foreach($rows as $row) { | |
// Get an associative array of data from the column data in $row: | |
$data = row_to_data($row); | |
// Some of the titles have periods on the end; get rid of them: | |
$data['title'] = rtrim($data['title'], '.'); | |
$filedata = get_original_file($data); | |
printf("Ref: %s. ", $data['ref']); | |
if ($filedata === FALSE) { | |
print("No image found, skipping.\n"); | |
//print_r($data); | |
} else { | |
// See if this image exists already. We look for the ref | |
// in the postmeta table. | |
$row_count = $wpdb->get_var( | |
$wpdb->prepare("SELECT COUNT(*) FROM $wpdb->postmeta WHERE meta_key = '_janetgyford_ref' AND meta_value = '%s' ", $data['ref']) | |
); | |
if ($row_count > 0) { | |
printf("Post exists, skipping: %s\n", $data['title']); | |
} elseif ($DRY_RUN !== FALSE) { | |
print("This image and data would be imported.\n"); | |
} else { | |
// DO IT! | |
$new_filename = sanitize_file_name($filedata['filename']); | |
// $uploads['path'] is like | |
// '/var/www/public/wp/wp-content/uploads/2017/10' | |
$new_filepath = $uploads['path'] . '/' . $new_filename; | |
// Put our file in the uploads directory: | |
copy($filedata['filepath'], $new_filepath); | |
// What we use to create our new attachment post: | |
$post_data = array( | |
'author' => $author_id, | |
'title' => $data['title'], | |
'content' => $data['description'], | |
'guid' => $uploads['url'] . '/' . $new_filename, | |
'mime_type' => $filedata['filetype']['type'] | |
); | |
// Creates a new Post of type attachment: | |
$post_id = create_attachment($post_data, $new_filepath); | |
if ( ! $post_id) { | |
printf("Couldn't create Post for: %s\n", $data['title']); | |
} else { | |
printf("CREATED Post ID %s.\n", $post_id); | |
if ($attach_data = wp_generate_attachment_metadata( $post_id, $new_filepath)) { | |
wp_update_attachment_metadata($post_id, $attach_data); | |
} | |
// Will be like '2017/10/my-new-filename.jpg' | |
$attached_file = substr($uploads['subdir'], 1) . '/' . $new_filename; | |
add_post_metadata($post_id, $data, $attached_file); | |
add_taxonomy($post_id); | |
// We want to leave at least one second between adding each | |
// Post so that there are no duplicate post_dates, which | |
// might cause problems if ordering by that. | |
sleep(1); | |
} | |
} | |
} | |
} | |
} | |
/** | |
* Returns an array of filename, filepath and filetype if an image starting | |
* with strtolower($data['ref']) exists. | |
* If it doesn't exist, returns FALSE. | |
*/ | |
function get_original_file($data) { | |
global $IMAGES, $IMAGE_SOURCE_DIR; | |
// $data['ref'] is like 'M10'. | |
// We want to look for a file starting with 'm0010'. | |
$num = substr($data['ref'], 1); // '10' | |
$padded = sprintf("%04d", $num); // '0010' | |
$filestart = 'm' . $padded; // 'm0010' | |
$filedata = FALSE; | |
// $IMAGES is an array of filenames in our source directory. | |
foreach ($IMAGES as $filename) { | |
if (strpos($filename, $filestart) === 0) { | |
// This filename matches our 'ref' so this is the one we need. | |
$filepath = $IMAGE_SOURCE_DIR . $filename; | |
$filedata = array( | |
'filename' => $filename, | |
'filepath' => $filepath, | |
'filetype' => wp_check_filetype($filepath, null) | |
); | |
break; | |
} | |
} | |
return $filedata; | |
} | |
/** | |
* Creates a new WP post using the data and returns the new $post_id. | |
*/ | |
function create_attachment($post_data, $uploaded_path) { | |
// Make a slug if it doesn't exist. | |
if (! array_key_exists('name', $post_data)) { | |
$post_data['name'] = sanitize_title_with_dashes(str_replace("_", "-", $post_data['title'])); | |
} | |
$post_id = wp_insert_attachment( | |
array( | |
'comment_status' => 'closed', | |
'ping_status' => 'closed', | |
'post_author' => $post_data['author'], | |
'post_name' => $post_data['name'], | |
'post_title' => $post_data['title'], | |
'post_content' => $post_data['content'], | |
'post_status' => 'publish', | |
'post_type' => 'attachment', | |
'post_mime_type' => $post_data['mime_type'], | |
'guid' => $post_data['guid'] | |
) | |
); | |
return $post_id; | |
} | |
/** | |
* Links the attached file to the post, and adds our custom metadata. | |
* $post_id is the ID of the attachment post we created. | |
* $data is our associative array of data from a row of the CSV | |
* $attached_file is the path to the file WITHIN the uploads directory (it has | |
* no leading slash). e.g. '2017/10/my-new-filename.jpg'. | |
*/ | |
function add_post_metadata($post_id, $data, $attached_file) { | |
global $wpdb; | |
$table = $wpdb->prefix.'postmeta'; | |
// Associates the uploaded image with the post so that it shows up in | |
// the admin when viewing it in Media: | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_wp_attached_file', | |
'meta_value' => $attached_file)); | |
// Now add all our custom metadata: | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_janetgyford_ref', | |
'meta_value' => $data['ref'])); | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_jantetgyford_credit', | |
'meta_value' => $data['photographer'])); | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_janetgyford_publisher', | |
'meta_value' => $data['publisher'])); | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_janetgyford_date_taken', | |
'meta_value' => $data['date_taken'])); | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_janetgyford_source', | |
'meta_value' => $data['source'])); | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_janetgyford_restrictions', | |
'meta_value' => $data['restrictions'])); | |
$visibility = $data['no_pic'] === '' ? 'show' : 'hide'; | |
$wpdb->insert($table, array('post_id' => $post_id, | |
'meta_key' => '_janetgyford_visibility', | |
'meta_value' => $visibility)); | |
} | |
function add_taxonomy($post_id) { | |
global $TAXONOMY, $TERM_IDS; | |
if ($TAXONOMY !== '') { | |
wp_set_object_terms($post_id, $TERM_IDS, $TAXONOMY, False); | |
} | |
} | |
/** | |
* Takes an array of column data and returns an associative array with the | |
* same data. | |
*/ | |
function row_to_data($row) { | |
global $COLUMNS; | |
$data = array(); | |
foreach($row as $n => $col) { | |
$data[ $COLUMNS[$n] ] = trim($col); | |
} | |
return $data; | |
} | |