Skip to content

Instantly share code, notes, and snippets.

@adamzimmermann
Created August 15, 2016 19:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamzimmermann/79c43f662e008e930faf8df57808bbbd to your computer and use it in GitHub Desktop.
Save adamzimmermann/79c43f662e008e930faf8df57808bbbd to your computer and use it in GitHub Desktop.
Drupal Migrate class for using images as a migration source.
<?php
/**
* Source Migration class for finding image tags in markup.
*/
class ExampleMigrateSourceImage extends MigrateSource {
/**
* The string of HTML content.
*/
private $content = '';
/**
* The array of matching image tags found.
*/
private $matches = array();
/**
* The array index of the current match being imported.
*/
private $matchesCurrent = 0;
/**
* Indicates if the content has been queried and parsed.
*/
private $contentImported = FALSE;
/**
* {@inheritdoc}
*/
public function __construct($options = array()) {
parent::__construct($options);
}
/**
* {@inheritdoc}
*/
public function __toString() {
return t('Create and download images referenced from a string of markup.');
}
/**
* Return the number of available source records.
*/
public function computeCount() {
$this->importContent();
$matches = $this->matches;
if (is_array($matches)) {
return count($matches);
}
else {
return 0;
}
}
/**
* Returns a list of fields available to be mapped from the source.
*/
public function fields() {
return array(
'alt' => t('Alt text'),
'title' => t('Title text'),
'url' => t('URL'),
'credit' => t('Credit'),
'filename' => t('Filename'),
'node_title' => t('Title'),
);
}
/**
* Do whatever needs to be done to start a fresh traversal of the source data.
*
* This is always called at the start of an import, so tasks such as opening
* file handles, running queries, and so on should be performed here.
*/
public function performRewind() {
$this->matchesCurrent = 0;
}
/**
* Fetch the next row of data, returning it as an object.
*
* Return FALSE when there is no more data available.
*/
public function getNextRow() {
$this->importContent();
if ($this->matchesCurrent < $this->computeCount()) {
$row = new stdClass();
// Add all of the values found in @see findMatches().
$match = array_shift(array_slice($this->matches, $this->matchesCurrent, 1));
foreach ($match as $key => $value) {
$row->{$key} = $value;
}
// Increment the current match counter.
$this->matchesCurrent++;
return $row;
}
else {
return FALSE;
}
}
/**
* Find and parse the source data if it hasn't already been done.
*/
private function importContent() {
if (!$this->contentImported) {
// Build the content string to parse for images.
$this->buildContent();
// Find the images in the string and populate the matches array.
$this->findImages();
// Note that the import has been completed and does not need to be
// performed again.
$this->contentImported = TRUE;
}
}
/**
* Get all of the HTML that needs to be filtered for image tags and tokens.
*/
private function buildContent() {
$query = $this->contentQuery();
$content = $query->execute()->fetchAll();
if (!empty($content)) {
// This builds one long string for parsing that can done on long strings
// without using too much memory. Here, we add fields ‘foo’ and ‘bar’ from
// the query.
foreach ($content as $item) {
$this->content .= $item->foo;
$this->content .= $item->bar;
}
// This builds an array of content for parsing operations that need to be
// performed on smaller chunks of the source data to avoid memory issues.
// This is is only required if you run into parsing issues, otherwise it
// can be removed.
$this->contentArray[] = array(
'title' => $item->post_title,
'content' => $item->post_content,
'id' => $item->id,
);
}
}
/**
* Creates the query that will be used to build the content string.
*
* @return object
* A SelectQuery object.
*/
protected function contentQuery() {
$query = Database::getConnection('default', 'example')->select('table', 't');
$query->fields('t', array(
'foo',
'bar',
));
$query->condition('t.foo', '', '!=');
$query->condition('t.bar', '', '!=');
return $query;
}
/**
* Finds the desired elements in the markup.
*/
private function findImages() {
// Verify that content was found.
if (empty($this->content)) {
$message = 'No HTML content with image tags to download could be found.';
watchdog('example_migrate', $message);
return FALSE;
}
// Find images where the entire source content string can be parsed at once.
$this->findImageMethodOne();
// Find images where the source content must be parsed in chunks.
foreach ($this->contentArray as $id => $post) {
$this->findImageMethodTwo($post);
}
}
/**
* This is an example of a image finding method.
*/
private function findImageMethodOne() {
// Create a regex to look through the content.
$matches = array();
$regex = '/regex/to/find/images/';
preg_match_all($regex, $this->content, $matches, PREG_SET_ORDER);
// Set a unique row identifier from some captured pattern of the regex-
// this would likely be the full path to the image. You might need to
// perform cleanup on this value to standardize it, as the path
// to /foo/bar/image.jpg, example.com/foo/bar/image.jpg, and
// http://example.com/foo/bar/image.jpg should not create three unique
// source records. Standardizing the URL is key for not just avoiding
// creating duplicate source records, but the URL is also the ID value you
// will use in your destination class mapping callback that looks up the
// resulting image entity ID from the data it finds in the body field.
$id = 'http://example.com/foo/bar/image.jpg';
// Add to the list of matches after performing more custom logic to
// find all of the correct chunks of data we need. Be sure to set
// every value here that you will need when constructing your entity later.
$this->matches[$id] = array(
'url' => $src,
'alt' => $alttext,
'title' => $description,
'credit' => $credit,
'id' => $id,
'filename' => $filename,
'custom_thing' => $custom_thing,
);
}
/**
* This is another example of a image finding method.
*/
private function findImageMethodTwo() {
// Some DOM library parsing code could live here. Then the images that were
// found would be added to $this->matches just like they are added in the
// findImageMethodOne() method.
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment