Last active
December 7, 2017 03:11
-
-
Save cjming/fa9858d490ed73cc600a033ac69cb128 to your computer and use it in GitHub Desktop.
Taxonomy term shuffles - run hook updates with batch API in D7
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Implements hook_update_N(). | |
* | |
* Re-assign nodes from one set of terms to new set of taxonomy terms. | |
* Redirect old terms pages to new terms pages. | |
* Output updated nodes and tids to CSV. | |
* Delete old set of taxonomy terms. | |
*/ | |
function my_module_update_7001(&$sandbox) { | |
// Setup the initial processing. | |
if (!isset($sandbox['processed'])) { | |
// Set vars from csv. | |
$csv_rows = variable_get('{csv_variable_name}', array()); | |
// Cut out if there are no update vars. | |
if (empty($csv_rows)) { | |
return; | |
} | |
$tids_old = $tids_new = array(); | |
foreach ($csv_rows as $row) { | |
$tids_old[] = $row[0]; | |
$tids_new[] = $row[1]; | |
} | |
// Cut out if there are no update vars. | |
if (empty($tids_old) || empty($tids_new)) { | |
return; | |
} | |
$nids_all = array(); | |
foreach ($tids_old as $tid_old) { | |
// Get all the nids that have this term attached. | |
$these_nids = taxonomy_select_nodes($tid_old, FALSE, FALSE, $order = array('t.nid' => 'ASC')); | |
// Merge array of attached nids to master array of nids. | |
if (!empty($these_nids)) { | |
$nids_all = array_merge($these_nids, $nids_all); | |
} | |
} | |
// Clean up the nids_all array - sort, remove duplicates. | |
sort($nids_all); | |
$nids_unique = array_unique($nids_all); | |
// Save initial sandbox variables. | |
$sandbox['processed'] = 0; | |
$sandbox['total'] = count($nids_unique); | |
$sandbox['csv_nids'] = array(); | |
$sandbox['csv_tids'] = array(); | |
$sandbox['nids_delete'] = array(); | |
$sandbox['nids'] = $nids_unique; | |
$sandbox['tids_old'] = $tids_old; | |
$sandbox['tids_new'] = $tids_new; | |
} | |
// Iterate 5 times per batch. | |
$items_per_loop = 5; | |
// Update items as it gets closer to the end. | |
$items_per_loop = ($sandbox['total'] - $sandbox['processed'] < $items_per_loop) ? $sandbox['total'] - $sandbox['processed'] : $items_per_loop; | |
// Process a batch of nids 5 at a time. | |
for ($i = 0; $i < $items_per_loop; $i++) { | |
// Iterate through all the nids. | |
$this_nid = array_shift($sandbox['nids']); | |
$this_node = node_load(array_shift($sandbox['nids'])$this_nid); | |
// Load and wrap the node if it exists. | |
if (!empty($this_node)) { | |
$node_wrapper = entity_metadata_wrapper('node', $this_node); | |
// Get the term object values of this node's field_categories. | |
$node_terms = $node_wrapper->field_categories->value(); | |
// Get the raw tids of this node's field_categories. | |
$node_tids_raw = $node_wrapper->field_categories->raw(); | |
// Create new array to store updated tids per node. | |
if (!empty($node_tids_raw)) { | |
// Make sure $node_tids is an array. | |
$node_tids = is_array($node_tids_raw) ? $node_tids_raw : array($node_tids_raw); | |
$node_tids_update = $node_tids; | |
} | |
// Loop thru the node's field_categories tids. | |
foreach ($node_tids as $delta => $tid) { | |
// Check if each tid is a valid term object. | |
if (!empty($node_terms[$delta])) { | |
// Check if this tid should be deleted. | |
if (in_array($tid, $sandbox['tids_old'])) { | |
// Update the node's field_categories array of tids by removing the | |
// duplicate to-be-deleted tids. | |
$node_tids_update = array_diff($node_tids_update, array($tid)); | |
// Grab the corresponding key from the to-be-deleted terms array. | |
$key = array_search($tid, $sandbox['tids_old']); | |
// If the new tid is not in the nodes categories, add it. | |
if (!in_array($sandbox['tids_new'][$key], $node_tids_update)) { | |
$node_tids_update[] = $sandbox['tids_new'][$key]; | |
} | |
} | |
} | |
else { | |
// Update the node's field_categories array of tids by removing | |
// the tids that don't have a valid term object. Remove cruft - | |
// orphaned term ids that no longer map to a term object. | |
$node_tids_update = array_diff($node_tids_update, array($tid)); | |
} | |
} | |
// Return all the values from the updated field_categories array and | |
// index the updated field_categories array numerically. | |
$node_tids_update = array_values($node_tids_update); | |
// Save the updated field_categories array to the node. | |
$node_wrapper->field_categories->set($node_tids_update); | |
$node_wrapper->save(); | |
// Save the nid data for output to CSV. | |
$sandbox['csv_nids'][] = array( | |
$this_nid, | |
drupal_get_path_alias('node/' . $this_nid), | |
'"' . implode(" / ", $node_tids) . '"', | |
'"' . implode(" / ", $node_tids_update) . '"', | |
); | |
} | |
else { | |
// Add node ids to be deleted from taxonomy_index later if the loaded node | |
// doesn't exist. | |
$sandbox['nids_delete'][] = $this_nid; | |
// Save the nid data for output to CSV. | |
$sandbox['csv_nids'][] = array( | |
$this_nid, | |
'not a valid node', | |
'N/A', | |
'N/A', | |
); | |
} | |
// Rinse. Repeat. | |
$sandbox['processed']++; | |
} | |
// Update batch status. | |
$sandbox['#finished'] = ($sandbox['processed'] >= $sandbox['total']) ? 1 : ($sandbox['processed'] / $sandbox['total']); | |
// Once all batches are done, set up data for CSV output. | |
if ($sandbox['#finished'] >= 1) { | |
// Add redirect from to-be-deleted term aliases to reassigned term page. | |
foreach ($sandbox['tids_old'] as $index => $tid_old) { | |
$old_term_path = 'taxonomy/term/' . $tid_old; | |
$old_term_alias = drupal_get_path_alias($old_term_path); | |
$new_tid_path = 'taxonomy/term/' . $sandbox['tids_new'][$index]; | |
$redirects = db_query("SELECT rid, source, redirect | |
FROM {redirect} | |
WHERE source='" . $old_term_alias . "' | |
AND redirect='" . $new_tid_path . "'")->fetchAll(); | |
// Delete old term before adding redirects from old term path and alias. | |
taxonomy_term_delete($tid_old); | |
if (empty($redirects)) { | |
// Create a new redirect from to-be-deleted term page alias to | |
// reassigned term page if it doesn't already exist. | |
$new_redirect_from_alias = new stdClass(); | |
redirect_object_prepare($new_redirect_from_alias, array('source' => $old_term_alias, 'redirect' => $new_tid_path)); | |
redirect_save($new_redirect_from_alias); | |
// Create a new redirect from to-be-deleted term Drupal path to | |
// reassigned term page's drupal path. | |
$new_redirect_from_tid = new stdClass(); | |
redirect_object_prepare($new_redirect_from_tid, array('source' => $old_term_path, 'redirect' => $new_tid_path)); | |
redirect_save($new_redirect_from_tid); | |
// Write tid updates to CSV. | |
$tid_new_related_nids = taxonomy_select_nodes($sandbox['tids_new'][$index], FALSE, FALSE, $order = array('t.nid' => 'ASC')); | |
$sandbox['csv_tids'][] = array( | |
'tid_delete' => $tid_old, | |
'tid_delete_path' => 'taxonomy/term/' . $tid_old, | |
'tid_delete_alias' => $old_term_alias, | |
'status' => 'new', | |
'source_drupal_path' => $old_term_path, | |
'source_alias' => $old_term_alias, | |
'redirect' => $new_tid_path, | |
'tid_new_alias' => drupal_get_path_alias($new_tid_path), | |
'tid_new_related_nids' => '"' . implode(" | ", $tid_new_related_nids) . '"', | |
); | |
} | |
} | |
// Prepare headers for CSV files. | |
$csv_tid_headers = array( | |
'tid_delete' => 'tid_delete', | |
'tid_delete_path' => 'tid_delete_path', | |
'tid_delete_alias' => 'tid_delete_alias', | |
'status' => 'status', | |
'source_drupal_path' => 'source_drupal_path', | |
'source_alias' => 'source_alias', | |
'status' => 'status', | |
'redirect' => 'redirect', | |
'tid_new_alias' => 'tid_new_alias', | |
'tid_new_related_nids' => 'tid_new_related_nids', | |
); | |
$csv_nid_headers = array( | |
'nid' => 'nid', | |
'path' => 'path', | |
'tids_previous' => 'tids_previous', | |
'tids_updated' => 'tids_updated', | |
); | |
// Output updated tids/nids to csv file. | |
_my_module_create_csv($sandbox['csv_tids'], $csv_tid_headers, 'tids-to-delete'); | |
_my_module_create_csv($sandbox['csv_nids'], $csv_nid_headers, 'nids-updated'); | |
// Delete orphaned node ids from the taxonomy_index table. | |
if (!empty($sandbox['nids_delete'])) { | |
$nids_delete = $sandbox['nids_delete']; | |
db_delete('taxonomy_index') | |
->condition('nid', $nids_delete, 'IN') | |
->execute(); | |
} | |
} | |
// Return progress status. | |
$args = array( | |
'!done' => $sandbox['processed'], | |
'!total' => $sandbox['total'], | |
'!pct' => round(($sandbox['processed'] / $sandbox['total']) * 100, 2), | |
); | |
return t('Completed !done/!total (!pct%)', $args); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @file | |
* Upload CSVs. | |
*/ | |
/** | |
* Implements hook_menu(). | |
*/ | |
function my_module_csv_menu() { | |
$items['my_module_csv'] = array( | |
'title' => 'Import Data from CSV File', | |
'description' => 'Import content from a <abbr title="Comma Separated Values">CSV</abbr> file.', | |
'access callback' => 'user_access', | |
'access arguments' => array('administer site configuration'), | |
'page callback' => 'my_module_csv_pagecallback', | |
'type' => MENU_NORMAL_ITEM, | |
); | |
return $items; | |
} | |
/** | |
* Page callback function for the menu item. | |
*/ | |
function my_module_csv_pagecallback() { | |
$module_path = drupal_get_path('module', 'my_module_csv'); | |
$form = drupal_get_form('my_module_csv_form'); | |
$output = "<p>This tool will attempt to import CSV data"; | |
$output .= drupal_render($form); | |
return $output; | |
} | |
/** | |
* Build form. | |
*/ | |
function my_module_csv_form() { | |
$form['#attributes'] = array( | |
'enctype' => 'multipart/form-data', | |
); | |
$form['csvfile'] = array( | |
'#title' => t('CSV File'), | |
'#type' => 'file', | |
'#description' => ($max_size = parse_size(ini_get('upload_max_filesize'))) ? t('Due to server restrictions, the <strong>maximum upload file size is !max_size</strong>. Files that exceed this size will be disregarded.', array('!max_size' => format_size($max_size))) : '', | |
); | |
$form['submit'] = array( | |
'#type' => 'submit', | |
'#value' => t('Commence Import'), | |
); | |
$form['#validate'] = array( | |
'my_module_csv_validate_fileupload', | |
'my_module_csv_form_validate', | |
); | |
return $form; | |
} | |
/** | |
* Validate CSV file upload. | |
*/ | |
function my_module_csv_validate_fileupload(&$form, &$form_state) { | |
$validators = array( | |
'file_validate_extensions' => array('csv'), | |
); | |
$csv_dir = 'public://csv'; | |
if (file_prepare_directory($csv_dir, FILE_CREATE_DIRECTORY)) { | |
if ($file = file_save_upload('csvfile', $validators, "public://csv", FILE_EXISTS_REPLACE)) { | |
$form_state['values']['csvupload'] = $file->destination; | |
} | |
else { | |
form_set_error('my_module_csv', t('Unable to copy upload file to !dest', array('!dest' => $destination))); | |
} | |
} | |
} | |
/** | |
* Validate CSV file. | |
*/ | |
function my_module_csv_form_validate(&$form, &$form_state) { | |
if (isset($form_state['values']['csvupload'])) { | |
if ($handle = fopen($form_state['values']['csvupload'], 'r')) { | |
if (!($line = fgetcsv($handle, 4096))) { | |
form_set_error('csvfile', t('Something went wrong. Could not read CSV file.')); | |
} | |
fclose($handle); | |
} | |
else { | |
form_set_error('csvfile', t('Unable to read uploaded file !filepath', array('!filepath' => $form_state['values']['csvupload']))); | |
} | |
} | |
} | |
/** | |
* Custom submit CSV file. | |
*/ | |
function my_module_csv_form_submit(&$form, &$form_state) { | |
$batch = array( | |
'title' => t('Importing CSV ...'), | |
'operations' => array(), | |
'init_message' => t('Commencing'), | |
'progress_message' => t('Processed @current out of @total.'), | |
'error_message' => t('An error occurred during processing'), | |
'finished' => 'outside_csv_import_finished', | |
); | |
if (isset($form_state['values']['csvupload'])) { | |
if ($handle = fopen($form_state['values']['csvupload'], 'r')) { | |
$batch['operations'][] = array('_my_module_csv_remember_filename', array($form_state['values']['csvupload'])); | |
while ($line = fgetcsv($handle, 4096)) { | |
// base64_encode is used to ensure we don't overload the batch processor | |
// by stuffing complex objects into it. | |
$batch['operations'][] = array( | |
'_my_module_csv_import_line', | |
array(array_map('base64_encode', $line)), | |
); | |
} | |
fclose($handle); | |
} | |
} | |
batch_set($batch); | |
} | |
/** | |
* Batch API finished callback. | |
*/ | |
function my_module_csv_import_finished($success, $results, $operations) { | |
if (!empty($results['failed_rows'])) { | |
$dir = file_directory_path() . '/outside_csv/'; | |
if (file_check_directory($dir, FILE_CREATE_DIRECTORY)) { | |
$csv_filename = 'failed_rows-' . basename($results['uploaded_filename']); | |
$csv_filepath = $dir . '/' . $csv_filename; | |
$targs = array( | |
'!csv_url' => l(check_plain($csv_filename), $csv_filepath), | |
'%csv_filename' => $csv_filename, | |
'%csv_filepath' => $csv_filepath, | |
); | |
if ($handle = fopen($csv_filepath, 'w+')) { | |
foreach ($results['failed_rows'] as $failed_row) { | |
fputcsv($handle, $failed_row); | |
} | |
fclose($handle); | |
drupal_set_message(t('Some rows failed to import. You may download a CSV of these rows: !csv_url', $targs), 'error'); | |
} | |
else { | |
drupal_set_message(t('Some rows failed to import, but unable to write error CSV to %csv_filepath', $targs), 'error'); | |
} | |
} | |
else { | |
drupal_set_message(t('Some rows failed to import, but unable to create directory for error CSV at %csv_directory', $targs), 'error'); | |
} | |
} | |
return t('The CSV import has completed.'); | |
} | |
/** | |
* Batch API helper function. | |
*/ | |
function _my_module_csv_remember_filename($filename, &$context) { | |
$context['results']['uploaded_filename'] = $filename; | |
} | |
/** | |
* Common batch processing callback for all operations. | |
* | |
* Required to load data into a Drupal variable. | |
*/ | |
function _my_module_csv_import_line($line, $session_nid, &$context) { | |
// Get the filename of the uploaded csv without the extension. | |
$csv_filename = pathinfo($session_nid['results']['uploaded_filename'], PATHINFO_FILENAME); | |
// Get the variable from Drupal where the CSV rows are saved to. | |
$csv_variable = variable_get($csv_filename, array()); | |
$session_nid['results']['rows_imported']++; | |
$line = $cleaned_line = array_map('base64_decode', $line); | |
// Give feedback to the importer about which operation is being performed. | |
// Show the row count by default. | |
$session_nid['message'] = t('Importing row !c', array('!c' => $context['results']['rows_imported'])); | |
// Provide some feedback about the row currently being processed. | |
$session_nid['message'] = t('Importing %first', array('%first' => $line[0])); | |
// Capture and report on failed lines. | |
if ($line[1] == 'ROW' && $line[2] == 'FAILS') { | |
$session_nid['results']['failed_rows'][] = $line; | |
} | |
// Loop thru data fields per line and append to variable. | |
foreach ($line as $field) { | |
if (!empty($field)) { | |
$line_array[] = $field; | |
} | |
} | |
// Append the data to a variable. | |
$csv_variable[] = $line_array; | |
$session_nid['results']['variable'][] = $line_array; | |
// Clear line array for the next round. | |
$line_array = array(); | |
// Save the variable. | |
$csv_variable_name = 'my_module_' . $csv_filename; | |
variable_set($csv_variable_name, $csv_variable); | |
// Output message during batch process. | |
drupal_set_message("Parsed line {$line[0]}"); | |
} | |
/** | |
* Creates a csv file. | |
* | |
* @param array $data | |
* The array of data to export. | |
*/ | |
function _my_module_create_csv(array $data, array $headers, $title) { | |
$fh = fopen('php://temp', 'rw'); | |
fputcsv($fh, $headers); | |
foreach ($data as $row) { | |
fputcsv($fh, $row); | |
} | |
rewind($fh); | |
$data = stream_get_contents($fh); | |
fclose($fh); | |
$data = str_replace('"', '', $data); | |
// Get the temporary directory. | |
$temporary_directory = file_directory_temp(); | |
// Set the file name for the temporary directory. | |
$destination = file_directory_temp()$temporary_directory . '/' . $title . '-export-' . time() . '.csv'; | |
// Save the file. | |
$filename = file_unmanaged_save_data($data, $destination, FILE_EXISTS_REPLACE); | |
// The downloaded file name. | |
$download_name = $title . '-export-' . date('Y-m-d') . '.csv'; | |
drupal_add_http_header('Content-Type', 'text/csv; utf-8'); | |
drupal_add_http_header('Content-Disposition', 'attachment; filename=' . $download_name, TRUE); | |
readfile($filename); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for sharing the inspiring solution. In case there are a lot of nodes, you may not save the complete node but only the field value, as it can take a while to update hundred thousands of nodes. It would look like:
And if that still takes too long, as in my case you really should try to parallelize it with the great tutorial/code from Multi Processing Part 3: Jumping the Drupal Queue, see also: https://github.com/johnennewdeeson/drush-multi-processing