Skip to content

Instantly share code, notes, and snippets.

@esimonetti
Last active October 23, 2018 23:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save esimonetti/669ed488e0e28c093de8b863a9bfa133 to your computer and use it in GitHub Desktop.
Save esimonetti/669ed488e0e28c093de8b863a9bfa133 to your computer and use it in GitHub Desktop.
The latests up to date project is located here: https://github.com/esimonetti/SugarActivityStreamPurger. CLI script to clean activity streams to keep only the last N months, and keep only activities of records that are still valid. A more aggressive and faster version of this script can be found here: https://gist.github.com/esimonetti/39f65396f…
<?php
// Enrico Simonetti
// enricosimonetti.com
//
// 2017-12-13 on Sugar 7.9.2.0
//
// CLI script to clean activity streams to keep only the last N months, and keep only activities of records that are still valid
//
// TEST, TEST, TEST AND USE AT YOUR OWN RISK!
//
// For big systems it is recommended to add the indexes below, and then remove them once completed. It is also recommended to start
// with a longer window of time (eg: --keep-months=24 ) to gradually reduce the number of records, and purging completely the soft deleted records from the database
// To add indexes on MySQL:
// create index idx_act_del_type_entered on activities (deleted, activity_type, date_entered);
// create index idx_actuser_del on activities_users (deleted);
// create index idx_actuser_actid on activities_users (activity_id);
// To remove indexes:
// drop index idx_act_del_type_entered on activities;
// drop index idx_actuser_del on activities_users;
// drop index idx_actuser_actid on activities_users;
function usage($error = '') {
if (!empty($error)) print(PHP_EOL . 'Error: ' . $error . PHP_EOL);
print(' php ' . __FILE__ . ' --instance /full/path [--keep-months=6] [--deep-clean] [--force-db-delete]' . PHP_EOL);
exit(1);
}
// only allow CLI
$sapi_type = php_sapi_name();
if (substr($sapi_type, 0, 3) != 'cli') {
die(__FILE__ . ' is CLI only.');
}
// get command line params
$o = getopt('', array('instance:', 'keep-months::', 'deep-clean', 'force-db-delete'));
if (!$o) usage();
// find directory
if (!empty($o['instance']) && is_dir($o['instance'])) {
print('Debug: Entering directory ' . $o['instance'] . PHP_EOL);
chdir($o['instance']);
} else {
chdir(dirname(__FILE__));
}
if (!file_exists('config.php') || !file_exists('sugar_version.php')) {
usage('The provided directory is not a Sugar system');
}
// sugar basic setup
define('sugarEntry', true);
require_once('include/entryPoint.php');
if (extension_loaded('xdebug')) {
echo 'Xdebug is enabled on this system. It is highly recommended to disable Xdebug on PHP CLI before running this script. Xdebug will cause unwanted slowness.'.PHP_EOL;
}
// temporarily stop xdebug, xhprof and tideways if enabled
if (function_exists('xdebug_disable')) {
xdebug_disable();
}
if (function_exists('xhprof_disable')) {
xhprof_disable();
xhprof_sample_disable();
}
if (function_exists('tideways_disable')) {
tideways_disable();
}
if (empty($current_language)) {
$current_language = $sugar_config['default_language'];
}
$app_list_strings = return_app_list_strings_language($current_language);
$app_strings = return_application_language($current_language);
$mod_strings = return_module_language($current_language, 'Administration');
global $current_user;
$current_user = BeanFactory::getBean('Users');
$current_user->getSystemUser();
echo 'Trimming activity streams...' . PHP_EOL;
$results = array(
'activities' => 0,
'activities_users' => 0,
'comments' => 0,
);
$db = DBManagerFactory::getInstance();
// months to keep
$months_to_keep = 12;
if (!empty($o['keep-months'])) {
if ($o['keep-months'] > 0) {
$months_to_keep = $o['keep-months'];
}
if ($o['keep-months'] == 'none') {
$months_to_keep = 0;
}
}
echo 'The activity stream records older than ' . $months_to_keep . ' months, will be deleted.' . PHP_EOL;
$force_db_delete = false;
if (isset($o['force-db-delete'])) {
$force_db_delete = true;
echo 'The records will be completely REMOVED from the database.' . PHP_EOL;
} else {
echo 'The records will be soft deleted ( deleted = 1 ).' . PHP_EOL;
}
$deep_clean = false;
if (isset($o['deep-clean'])) {
$deep_clean = true;
echo 'Deep record by record cleaning is going to be performed.' . PHP_EOL;
}
$line = readline('Based on the above warnings, do you want to proceed? (Type yes/no and enter)' . PHP_EOL);
if (strtolower($line) != 'yes') {
die('Script interrupted' . PHP_EOL);
}
$start_time = microtime(true);
// find all the activities items we should not touch as they are related to a comment
$activities_to_keep = array();
$b1 = $db->getConnection()->createQueryBuilder();
$b1->select(array('parent_id'))->from('comments');
$b1->where("deleted = '0'");
echo 'Executing '.$b1->getSQL().PHP_EOL;
$res = $b1->execute();
while ($row = $res->fetch()) {
// before adding them to the list, check if the actual activity's parent record is not deleted, otherwise they can go
$activity = BeanFactory::getBean('Activities', $row['parent_id']);
if (!empty($activity->id) && !empty($activity->parent_id) && !empty($activity->parent_type)) {
$bean = BeanFactory::getBean($activity->parent_type, $activity->parent_id);
if (!empty($bean->id)) {
$activities_to_keep[$row['parent_id']] = $row['parent_id'];
}
}
echo '.';
}
echo PHP_EOL;
// delete every activities record older than N months, without comments
$date_entered_keep = gmdate('Y-m-d H:i:s', mktime(date('H'), date('i'), date('s'), date('m')-$months_to_keep, date('d'),date('Y')));
$act_candidates_for_delete = array();
// get all the activities that are not a user's post, that are too old
$b1 = $db->getConnection()->createQueryBuilder();
$b1->select(array('id'))->from('activities');
$b1->where("deleted = '0'");
$b1->andWhere('activity_type != '.$b1->createPositionalParameter('post'));
$b1->andWhere('date_entered < '.$b1->createPositionalParameter($date_entered_keep));
echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$res = $b1->execute();
while ($row = $res->fetch()) {
$act_candidates_for_delete[$row['id']] = $row['id'];
echo '.';
}
echo PHP_EOL;
// now get all the activities that are post, and check if their parent record is deleted
$b1 = $db->getConnection()->createQueryBuilder();
$b1->select(array('id', 'parent_id', 'parent_type'))->from('activities');
$b1->where("deleted = '0'");
$b1->andWhere('activity_type = '.$b1->createPositionalParameter('post'));
echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$res = $b1->execute();
while ($row = $res->fetch()) {
if (!empty($row['id']) && !empty($row['parent_id']) && !empty($row['parent_type'])) {
// see if i can access the bean
$bean = BeanFactory::getBean($row['parent_type'], $row['parent_id']);
if (empty($bean->id)) {
// can delete
$act_candidates_for_delete[$row['id']] = $row['id'];
}
echo '.';
}
}
echo PHP_EOL;
if (!empty($act_candidates_for_delete)) {
foreach ($act_candidates_for_delete as $activity_id) {
// if it is not something we need to keep...
if (!in_array($activity_id, $activities_to_keep)) {
if (!$force_db_delete) {
// mark as deleted the matching activities_users
$b1 = $db->getConnection()->createQueryBuilder();
$b1->update('activities_users');
$b1->set('deleted', '1');
$b1->where('activity_id = ' . $b1->createPositionalParameter($activity_id));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
// mark as deleted this activity
$b1 = $db->getConnection()->createQueryBuilder();
$b1->update('activities');
$b1->set('deleted', '1');
$b1->where('id = ' . $b1->createPositionalParameter($activity_id));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
} else {
// delete the matching activities_users
$b1 = $db->getConnection()->createQueryBuilder();
$b1->delete('activities_users');
$b1->where('activity_id = ' . $b1->createPositionalParameter($activity_id));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
// delete this activity
$b1 = $db->getConnection()->createQueryBuilder();
$b1->delete('activities');
$b1->where('id = ' . $b1->createPositionalParameter($activity_id));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
}
$results['activities_users']++;
$results['activities']++;
echo '..';
}
}
}
echo PHP_EOL;
if ($deep_clean) {
// find comments without a valid activities record, that can be deleted
$b1 = $db->getConnection()->createQueryBuilder();
$b1->select(array('com.id'))->from('comments', 'com');
$b1->leftJoin('com', 'activities', 'act', 'com.parent_id = act.id');
$b1->where("com.deleted = '0'");
$b1->andWhere('act.id is null');
echo 'Executing '.$b1->getSQL().PHP_EOL;
$res = $b1->execute();
while ($row = $res->fetch()) {
if (!$force_db_delete) {
// mark as deleted the comments without activities
$b1 = $db->getConnection()->createQueryBuilder();
$b1->update('comments');
$b1->set('deleted', '1');
$b1->where('id = ' . $b1->createPositionalParameter($row['id']));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
} else {
// delete the comments without activities
$b1 = $db->getConnection()->createQueryBuilder();
$b1->delete('comments');
$b1->where('id = ' . $b1->createPositionalParameter($row['id']));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
}
$results['comments']++;
echo '.';
}
echo PHP_EOL;
// find activities_users without an activity_id, that can be deleted
$act_users_to_delete = array();
$b1 = $db->getConnection()->createQueryBuilder();
$b1->select(array('au.id'))->from('activities_users', 'au');
$b1->leftJoin('au', 'activities', 'act', 'au.activity_id = act.id');
$b1->where("au.deleted = '0'");
$b1->andWhere('act.id is null');
echo 'Executing '.$b1->getSQL().PHP_EOL;
$res = $b1->execute();
while ($row = $res->fetch()) {
$act_users_to_delete[$row['id']] = $row['id'];
echo '.';
}
echo PHP_EOL;
// find activities_users with a deleted parent_type/parent_id, that can be deleted
$b1 = $db->getConnection()->createQueryBuilder();
$b1->select(array('id', 'parent_id', 'parent_type'))->from('activities_users');
$b1->where("deleted = '0'");
echo 'Executing '.$b1->getSQL().PHP_EOL;
$res = $b1->execute();
while ($row = $res->fetch()) {
if (!empty($row['id']) && !empty($row['parent_id']) && !empty($row['parent_type'])) {
// see if i can access the bean
$bean = BeanFactory::getBean($row['parent_type'], $row['parent_id']);
if (empty($bean->id)) {
// can delete
$act_users_to_delete[$row['id']] = $row['id'];
}
echo '.';
}
}
echo PHP_EOL;
if (!empty($act_users_to_delete)) {
foreach ($act_users_to_delete as $act_users_id) {
if (!$force_db_delete) {
// mark as deleted the matching activities_users
$b1 = $db->getConnection()->createQueryBuilder();
$b1->update('activities_users');
$b1->set('deleted', '1');
$b1->where('id = ' . $b1->createPositionalParameter($act_users_id));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
} else {
// delete the matching activities_users
$b1 = $db->getConnection()->createQueryBuilder();
$b1->delete('activities_users');
$b1->where('id = ' . $b1->createPositionalParameter($act_users_id));
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL;
$b1->execute();
}
$results['activities_users']++;
echo '.';
}
}
echo PHP_EOL;
}
echo 'Summary of records removed: ' . PHP_EOL;
foreach ($results as $heading => $value) {
echo $value . ' ' . $heading . PHP_EOL;
}
echo 'Note that the activities_users table summary might now be fully accurate, as for every record there can be multiple user activities records deleted' . PHP_EOL;
echo PHP_EOL;
print('Completed in ' . (int)(microtime(true) - $start_time) . ' seconds.' . PHP_EOL);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment