Skip to content

Instantly share code, notes, and snippets.

@girishmuraly
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save girishmuraly/9393882 to your computer and use it in GitHub Desktop.
Save girishmuraly/9393882 to your computer and use it in GitHub Desktop.
Drush command to relocate files into folders based on unchanging file id. http://lechronicles.blogspot.co.uk/2014/03/improve-performance-organize-your-files.html
<?php
/**
* Implements of hook_drush_command().
*/
function migration_filepath_drush_command() {
$items = array();
$items['migration_filepath'] = array(
'description' => "Move files to subdirectories",
'callback' => 'drush_migration_filepath',
'aliases' => array('mf'),
// We are using options rather than arguments so the source can be omitted
// if needed (e.g. files are just directly under sites/default/files).
'options' => array(
'source' => array(
'description' => 'The directory under files where the to be relocated files are.',
'example_value' => 'field/image',
),
'target' => array(
'description' => 'The target directory where the files will be moved to and restructured.',
'example_value' => 'pictures',
),
'limit' => array(
'description' => 'Use this option to explicitly limit the number of files to move. Do not use this option if you want to move all files.',
'example_value' => '15',
),
'list-info' => 'Print total number of files remaining to be moved and other helpful info and exit.',
),
'examples' => array(
'drush migration_filepath' => 'Moves all files located in the public file directory to fid based directory structure.',
),
'bootstrap' => DRUSH_BOOTSTRAP_DRUPAL_FULL
);
return $items;
}
/**
* Command callback to perform the file migration.
*
* The two optional options are the source and target directories. By default
* assumes that these are under the default path, e.g. sites/default/files.
*/
function drush_migration_filepath() {
// Source directory to be cleaned up. All images in this directory will be relocated.
$source_directory = rtrim(drush_get_option('source', ''), '/');
// Directory to place the new structure under. If does not exist will be created.
$target_directory = rtrim(drush_get_option('target', ''), '/');
// Regular expression to find files in the source directory.
// For now assume public files only.
// public://field/image/imagefield_hENvtS.png
$extensions = array('jpeg', 'jpg', 'gif', 'png');
// Construct a expression to find images located in the source directory.
$file_pattern = "[^\/]*"; // Finds anything that does not contain "/", should be fine.
// Append the trailing slash for the regular expression.
// Note, in some instances drupal places under public:/// (three slashes)
// even when no folder specified. Reason for this is not known yet.
$source_pattern = $source_directory ? $source_directory . "\/" : '';
$regex = "^public:\/\/" . $source_pattern . "(" . $file_pattern . ")\.(" . implode($extensions, '|') . ")$";
// Query the database for files that match this pattern.
$filetypes = array('image/jpeg', 'image/jpg', 'image/gif', 'image/png');
$query = db_select('file_managed', 'f')
->condition('filemime', $filetypes , 'IN')
->condition('uri', $regex, 'REGEXP')
// Select the files to be moved.
->fields('f', array('fid', 'filename', 'uri', 'timestamp'))
// Start from the oldest so that new additions in between the file move
// and the site using the new fe_paths path won't be tarnished.
->orderBy('timestamp', 'asc');
// Open a file in the public files/ directory to write redirects into.
$public_path = variable_get('file_public_path', conf_path() . '/files');
$redirects_filename = $public_path . "/image-redirects.map";
// Print helpful information on pending file moves and exit.
if (drush_get_option('list-info')) {
print $query;
print_r($query->arguments());
// Print total number of files that need moving
$num_files = $query->countQuery()->execute()->fetchField();
drush_print(dt('There are ' . $num_files . ' files in total to be moved.'));
drush_print(dt('The redirects file will be created at: ' . $redirects_filename));
return TRUE;
}
// Limit the number of files.
if ($limit = drush_get_option('limit')) {
$files = $query->range(0, $limit)->execute()->fetchAll();
}
else {
$files = $query->execute()->fetchAll();
}
$total_files = count($files);
drush_print(dt($total_files . ' files are set to be moved.'));
$count = 1;
// Keep a list of files that have not been moved.
$files_not_moved = array();
// Create or open the redirects file for writing.
$redirects_file = fopen($redirects_filename, 'a') or die("Unable to open redirects file for writing.");
foreach ($files as $file) {
preg_match_all("/$regex/i", $file->uri, $matches); // Note, $file->filename can be the SAME for different uri-s!
$filename = $matches[1][0] . "." . $matches[2][0];
$old_file_wrapper = file_stream_wrapper_get_instance_by_uri($file->uri);
// If the file has already been moved, or does not exist in the filesystem, move on.
if (FALSE === ($status = $old_file_wrapper->url_stat($file->uri, STREAM_URL_STAT_QUIET))) {
$files_not_moved[] = $file->uri;
drush_log("File entry in the database does not exist on the filesystem for " . $file->uri, 'notice');
continue;
}
// Each file should go to the directory based on its fid.
$target_directory_for_file = $target_directory . '/' . generate_directory_name($file);
// Construct a dummy URI for it so we can use the stream wrappers.
$new_directory = file_build_uri($target_directory_for_file);
$wrapper = file_stream_wrapper_get_instance_by_uri($new_directory);
// Make sure that the new directory exists.
$wrapper->mkdir($new_directory, 0777, TRUE);
// Construct the new directory.
$wrapper = file_stream_wrapper_get_instance_by_uri($file->uri);
$new_uri = file_build_uri($target_directory_for_file . '/' . $filename);
if (!$wrapper->rename($file->uri, $new_uri)) {
$files_not_moved[] = $file->uri;
drush_log("There was a problem moving " . $file->uri, 'notice');
continue;
}
$progress = round(($count / $total_files) * 100);
drush_print($progress . '%');
$query = db_update('file_managed')
->fields(array('uri' => $new_uri))
->condition('fid', $file->fid)
->execute();
// Append to the image redirects list.
$rel_old_url = parse_url(file_create_url($file->uri), PHP_URL_PATH);
$rel_new_url = parse_url(file_create_url($new_uri), PHP_URL_PATH);
fwrite($redirects_file, '~^' . $rel_old_url . ' ' . $rel_new_url . ';' . PHP_EOL);
$count++;
}
fclose($redirects_file);
$not_moved = count($files_not_moved) ? count($files_not_moved) : 0;
drush_print(dt('(' . ($total_files - $not_moved) . '/' . $total_files . ') ' .
'files moved.'));
if ($not_moved) {
drush_print(dt($not_moved . ' files do not exist in the filesystem.'));
}
drush_print(dt('Redirects file: ' . $redirects_filename));
}
/**
* Generate the new file structure based on the timestamp.
*/
function generate_directory_name($file) {
$fid = str_pad($file->fid, 10, '0', STR_PAD_LEFT);
$len = strlen($fid);
$top_dir = $fid[$len - 3];
$sub_dir = $fid[$len - 2] . $fid[$len - 1];
$value = $top_dir . '/' . $sub_dir;
return $value;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment