Skip to content

Instantly share code, notes, and snippets.

@cjming
Last active August 29, 2017 18:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cjming/189474dea51c7825580eb251f38d130b to your computer and use it in GitHub Desktop.
Save cjming/189474dea51c7825580eb251f38d130b to your computer and use it in GitHub Desktop.
Drush command to move files into a custom parent and child subfolders.
<?php
/**
* @file
* Drush command to move files into a custom parent and child subfolders.
*/
/**
* Implements hook_drush_command().
*/
function migration_filepath_custom_drush_command() {
$items = array();
$items['migration_filepath_custom'] = array(
'description' => "Move files to custom subdirectories",
'callback' => 'drush_migration_filepath_custom',
// We are using options rather than arguments so the source can be omitted
// if needed (e.g. files are just directly under sites/default/files).
'arguments' => array(
'number' => 'Number of buckets to create under the parent folder.',
),
'options' => array(
'source' => array(
'description' => 'The directory under files where the to be relocated files are.',
'example_value' => 'field/image',
),
'target' => array(
'description' => 'The target directory where the files will be moved to and restructured.',
'example_value' => 'pictures',
),
),
'examples' => array(
'drush migration_filepath_custom' => 'Moves all files located in a source subfolder to iterated style subdirectory.',
),
'bootstrap' => DRUSH_BOOTSTRAP_DRUPAL_FULL,
);
return $items;
}
/**
* Command callback to perform the file migration.
*
* The two optional options are the source and target directories. By default
* assumes that these are under the default path, e.g. sites/default/files.
*/
function drush_migration_filepath_custom($number = 10) {
// Source directory to be cleaned up.
// All images in this directory will be relocated.
$source_directory = rtrim(drush_get_option('source', ''), '/');
// Directory to place the new structure under.
// If does not exist will be created.
$target_directory = rtrim(drush_get_option('target', ''), '/');
// Set the target directory if it's not specified.
if (empty($target_directory)) {
if (strpos($source_directory, '/') !== false) {
$source_directory_string = $source_directory;
$source_directory_string = str_replace('/','-',$source_directory_string);
$target_directory = $source_directory_string . '_parent';
$target_child_directory = $source_directory_string;
}
else {
$target_directory = $source_directory . '_parent';
$target_child_directory = $source_directory;
}
}
// Regular expression to find files in the source directory.
// For now assume public files only.
$extensions = array('jpeg', 'jpg', 'JPG', 'gif', 'png');
// Construct a expression to find images located in the source directory.
// Finds anything that does not contain "/", should be fine.
$file_pattern = "[^\/]*";
// Append the trailing slash for the regular expression.
// Note, in some instances drupal places under public:/// (three slashes)
// even when no folder specified. Reason for this is not known yet.
$source_pattern = $source_directory ? $source_directory . "\/" : '';
$regex = "^public:\/\/" . $source_pattern . "(" . $file_pattern . ")\.(" . implode($extensions, '|') . ")$";
// Query the database for files that match this pattern.
$filetypes = array('image/jpeg', 'image/jpg', 'image/gif', 'image/png');
$query = db_select('file_managed', 'f')
->condition('filemime', $filetypes, 'IN')
->condition('uri', $regex, 'REGEXP');
$total_count = $query->countQuery()->execute()->fetchField();
drush_print(dt('@count entries are to be moved.', array('@count' => $total_count)));
// Select the files to be moved.
$files = $query->fields('f', array('fid', 'filename', 'uri', 'timestamp'))
->execute()
->fetchAll();
// Divide total number of files into n buckets.
$total_files = count($files);
$loops_per_bucket_raw = $total_files / $number;
$loops_per_bucket = ceil($loops_per_bucket_raw);
// Initialize some variables.
$target_subfolder = $target_child_directory . '_1';
$count = 1;
$bucket_count = 1;
// Keep a list of files that have not been moved.
$files_not_moved = array();
// Create the 1st directory.
$target_directory_for_file = $target_directory . '/' . $target_subfolder;
$new_directory = file_build_uri($target_directory_for_file);
$wrapper = file_stream_wrapper_get_instance_by_uri($new_directory);
$wrapper->mkdir($new_directory, 0755, TRUE);
// Loop thru files and move them.
foreach ($files as $file) {
// Note, $file->filename can be the SAME for different uri-s!
$source_pattern = $source_directory ? $source_directory . "/" : '';
$regex = "^public:\/\/" . str_replace("/", "\/", $source_pattern) . "(" . $file_pattern . ")\.(" . implode($extensions, '|') . ")$";
preg_match_all("/$regex/i", $file->uri, $matches);
$filename = $matches[1][0] . "." . $matches[2][0];
// Get the file wrapper of the file to be moved.
$old_file_wrapper = file_stream_wrapper_get_instance_by_uri($file->uri);
// If the file has already been moved, or does not exist in the
// filesystem, move on.
if (FALSE === ($status = $old_file_wrapper->url_stat($file->uri, STREAM_URL_STAT_QUIET))) {
$files_not_moved[] = $file->uri;
drush_log("File entry in the database does not exist on the filesystem.", 'notice');
continue;
}
// Increment the bucket appended folder number if we've filled the previous
// bucket with the number of files in $loops_per_bucket.
if ($count % $loops_per_bucket == 0) {
$bucket_count++;
$target_subfolder = $target_child_directory . '_' . $bucket_count;
// Each file should go to the directory based on its timestamp.
$target_directory_for_file = $target_directory . '/' . $target_subfolder;
// Construct a dummy URI for it so we can use the stream wrappers.
$new_directory = file_build_uri($target_directory_for_file);
$wrapper = file_stream_wrapper_get_instance_by_uri($new_directory);
// Make sure that the new directory exists.
if (!$wrapper->mkdir($new_directory, 0755, TRUE)) {
drush_print(dt('cannot make @newdir', array('@newdir' => $new_directory)));
continue;
}
}
// Construct the new directory.
$wrapper = file_stream_wrapper_get_instance_by_uri($file->uri);
$new_uri = file_build_uri($target_directory_for_file . '/' . $filename);
if (!$wrapper->rename($file->uri, $new_uri)) {
$files_not_moved[] = $file->uri;
continue;
}
// Update the file_managed table and track progress, count.
$progress = round(($count / $total_count) * 100);
drush_print($progress . '%');
$query = db_update('file_managed')
->fields(array('uri' => $new_uri))
->condition('fid', $file->fid)
->execute();
$count++;
}
$not_moved = count($files_not_moved) ? count($files_not_moved) : 0;
// Output count of files that couldn't be moved.
drush_print(dt('(' . ($total_files - $not_moved) . '/' . $total_files . ') ' . 'files moved.'));
if ($not_moved) {
drush_print(dt($not_moved . ' files do not exist in the filesystem.'));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment