Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save levonlee/eb8618615552180588af37a2821ccc84 to your computer and use it in GitHub Desktop.
Save levonlee/eb8618615552180588af37a2821ccc84 to your computer and use it in GitHub Desktop.
Extend Drupal drush_feeds_import module to schedule large feeds import jobs using shell script
<?php
/**
* @file
* Drush integration for the feeds module.
*/
/**
* Implements hook_drush_command().
*/
function drush_feeds_import_drush_command() {
$items['feeds-import'] = array(
'description' => dt('Enable to run a specific feed import'),
'options' => array(
'feed-id' => array(
'description' => dt('Feed ID to import.'),
),
),
'examples' => array(
'drush feeds-import --feed-id=feed_importer_1' => "Define the feed-id to process in background.\nIf feed is not configured to run in background will be skipped.",
),
);
$items['feeds-import-all'] = array(
'description' => dt('Import all feeds available'),
);
$items['feeds-list'] = array(
'description' => dt('Get a list of all feeds available'),
);
$items['feeds-state'] = array(
'description' => dt('Get import progress of a feed'),
'options' => array(
'feed-id' => array(
'description' => dt('Feed ID to check import progress status.'),
),
),
'examples' => array(
'drush feeds-state --feed-id=feed_importer_1' => "Define the feed-id to get import progress status",
),
);
$items['feeds-log'] = array(
'description' => dt('Use Drush to log Feeds'),
'options' => array(
'm' => array(
'description' => dt('Message'),
),
),
'examples' => array(
'drush feeds-log --m="some message"' => "Log to Recent Log message",
),
);
$items['feeds-unlock'] = array(
'description' => dt('Unlock a feed'),
'options' => array(
'feed-id' => array(
'description' => dt('Feed ID to unlock.'),
),
),
'examples' => array(
'drush feeds-unlock --feed-id=feed_importer_1' => "Define the feed-id to unlock",
),
);
return $items;
}
/**
* Import a individual feed.
*/
function _feed_import($id) {
$feed = db_query("SELECT feed_nid FROM {feeds_source} WHERE id = :id", array(':id' => $id))->fetchObject();
if ($feed->feed_nid || $feed->feed_nid==0) {
$feed_obj = feeds_source($id, $feed->feed_nid);
$feed_config = $feed_obj->importer->getConfig();
if ($feed_config['process_in_background']) {
$feed_obj->startImport();
}
else {
drush_log(dt('Feed @id is not enabled to Run in Background', array('@id' => $id)), 'error');
}
}
else {
drush_log(dt('Invalid Feed ID: @id', array('@id' => $id)), 'error');
}
}
/**
* A command callback.
*/
function drush_drush_feeds_import_feeds_import_all() {
$feeds = db_query("SELECT * FROM {feeds_source}");
foreach ($feeds as $feed) {
_feed_import($feed->id);
}
}
/**
* A command callback.
*/
function drush_drush_feeds_import_feeds_import() {
$id = drush_get_option('feed-id');
if ($id) {
_feed_import($id);
}
else {
drush_log(dt('Feed ID is required. Usage: drush feeds-import #ID.'), 'error');
}
}
/**
* A command callback.
*/
function drush_drush_feeds_import_feeds_list() {
$rows[] = array(dt('ID'), dt('Feed NID'),dt('Process in Background'));
$feeds = db_query("SELECT * FROM {feeds_source}");
foreach ($feeds as $feed) {
$feed_config = feeds_source($feed->id, $feed->feed_nid)->importer->getConfig();
$rows[] = array($feed->id,
$feed->feed_nid,
($feed_config['process_in_background'])?'TRUE' : 'FALSE',
);
}
drush_print_table($rows, TRUE);
}
/**
* Check import progress of an individual feed.
*/
function drush_drush_feeds_import_feeds_state() {
$id = drush_get_option('feed-id');
if ($id) {
$feed = db_query("SELECT feed_nid FROM {feeds_source} WHERE id = :id", array(':id' => $id))->fetchObject();
if ($feed->feed_nid || $feed->feed_nid==0) {
$feed_obj = feeds_source($id, $feed->feed_nid);
$progress = $feed_obj->progressImporting();
if ($progress != FEEDS_BATCH_COMPLETE) {
drush_print(t('Import not complete: @p%', array('@p' => round((float)$progress * 100 ))));
} else {
drush_print(t('Import complete'));
}
}
else {
drush_log(dt('Invalid Feed ID: @id', array('@id' => $id)), 'error');
}
}
else {
drush_log(dt('Feed ID is required. Usage: drush feeds-import #ID.'), 'error');
}
}
/**
* A command callback.
*/
function drush_drush_feeds_import_feeds_log() {
$m = drush_get_option('m');
if ($m) {
watchdog('Feeds', t('@m', array('@m' => $m)));
}
}
/**
* Unlocks a feed if it's locked.
*/
function drush_drush_feeds_import_feeds_unlock() {
$id = drush_get_option('feed-id');
if ($id) {
$feed = db_query("SELECT feed_nid FROM {feeds_source} WHERE id = :id", array(':id' => $id))->fetchObject();
if ($feed->feed_nid || $feed->feed_nid==0) {
$feed_obj = feeds_source($id, $feed->feed_nid);
$locked = !lock_may_be_available("feeds_source_{$id}_0");
$locked = $locked || $feed_obj->progressImporting() !== FEEDS_BATCH_COMPLETE;
$locked = $locked || $feed_obj->progressClearing() !== FEEDS_BATCH_COMPLETE;
if (!$locked) {
drush_print(t('Feed is not locked.'));
} else {
db_update('feeds_source')
->condition('id', $id)
->fields(array('state' => FALSE))
->execute();
db_delete('semaphore')
->condition('name', "feeds_source_{$id}_0")
->execute();
drush_print(t('Feed is unlocked.'));
}
} else {
drush_log(dt('Invalid Feed ID: @id', array('@id' => $id)), 'error');
}
}
else {
drush_log(dt('Feed ID is required. Usage: drush feeds-unlock #ID.'), 'error');
}
}
#!/bin/bash
#$1 site
#$2 feed_import_limit_default=50
#$3 feed_import_limit=50
#$4 feed_import_limit_var="feeds_process_limit"
#$5 feed_import_max_loop=10
#$6 feed
number_loop=0
output=$(drush $1 feeds-state --feed-id=$6 --strict=0)
if [ "$output" = "Import complete" ]; then
m="Starting a new import for $6"
echo $m
drush $1 feeds-log --m="$m" --strict=0
else
m="$output for $6"
output=$(drush $1 feeds-unlock --feed-id=$6 --strict=0)
m="$m: $output"
echo $m
drush $1 feeds-log --m="$m" --strict=0
fi
while [ $number_loop -lt $5 ]; do
(( number_loop++ ))
echo -n "Batch #$number_loop at batch size $3: "
output=$(drush $1 feeds-import --feed-id=$6 --strict=0)
if [ -n "$output" ]; then
echo "Errors or warning.."
else
echo "Good!"
fi
output=$(drush $1 feeds-state --feed-id=$6 --strict=0)
if [ "$output" = "Import complete" ]; then
m="Import complete for $6"
echo $m
drush $1 feeds-log --m="$m" --strict=0
break
fi
done
if [ "$output" != "Import complete" ]; then
m="$output for $6. $number_loop batches are run at batch size $3"
echo $m
drush $1 feeds-log --m="$m" --strict=0
fi
#!/bin/bash
site="@some.drush.alias"
# default feed import batch size is 50
feed_import_limit_default=50
# Your desired feed import batch size
feed_import_limit=80
# feed import batch size variable name
feed_import_limit_var="feeds_process_limit"
# To prevent from looping indefinitely
feed_import_max_loop=25
# Set your desired feed import batch size
drush $site vset $feed_import_limit_var $feed_import_limit --strict=0
/path/to/import-a-feed.sh $site $feed_import_limit_default $feed_import_limit $feed_import_limit_var $feed_import_max_loop feed_1_id
/path/to/import-a-feed.sh $site $feed_import_limit_default $feed_import_limit $feed_import_limit_var $feed_import_max_loop feed_2_id
# Set back default feed import batch size
drush $site vset $feed_import_limit_var $feed_import_limit_default --strict=0

Turn off Periodic import in Feeds Import UI Extend dursh_feeds_import module to have more drush commands (drush_feeds_import.drush.inc)

  • See import progress: drush feeds-state --feed-id=feed_importer_1
  • Add watchdog (type:Feeds): drush feeds-log --m="some message"
  • Unlock a feed if it's locked: drush feeds-unlock --feed-id=feed_importer_1 Use shell scripts to run large imports in schedule
  • import-a-feed.sh starts a feed import. If the feed is locked because import progress or clear progress is not completed, the feed will be unlocked and a whole new import will be started
  • import-all-daily-feeds.sh imports all feeds. Each feed will be completely imported when import-all-daily-feeds.sh is finished. Or the maximum number of batches is reached. Each feed will be imported one after another import-all-daily-feeds.sh will be used in crontab which runs once a day.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment