Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
SQL script for properly clearing Alfresco unused audit data / property values (NOT node properties, but the property tables used for AuditComponent and AttributeService functionality) for PostgreSQL / MySQL.NOTE: Only run when the Alfresco Repository is NOT running as values kept in in-memory caches will otherwise result in constraint violations.
set autocommit=0;
begin;
-- get all active references to alf_prop_root
create temporary table temp1 (id bigint(20), index (id));
insert into temp1 select disabled_paths_id as id from alf_audit_app;
insert into temp1 select audit_values_id as id from alf_audit_entry;
insert into temp1 select prop1_id as id from alf_prop_unique_ctx;
-- determine the obsolete entries from alf_prop_root
create temporary table temp2 (id bigint(20), index (id));
insert into temp2 select alf_prop_root.id as id from alf_prop_root left join temp1 on temp1.id = alf_prop_root.id where temp1.id is null;
-- clear alf_prop_root which cascades DELETE to alf_prop_link
delete from alf_prop_root where id in (select id from temp2);
-- cleanup temporary structures
drop table temp1;
drop table temp2;
-- get all active references to alf_prop_value
create temporary table temp1 (id bigint(20), index (id));
insert into temp1 select id from alf_prop_value where id in (select app_name_id from alf_audit_app);
insert into temp1 select audit_user_id as id from alf_audit_entry;
insert into temp1 select key_prop_id as id from alf_prop_link;
insert into temp1 select value_prop_id as id from alf_prop_link;
insert into temp1 select value1_prop_id as id from alf_prop_unique_ctx;
insert into temp1 select value2_prop_id as id from alf_prop_unique_ctx;
insert into temp1 select value3_prop_id as id from alf_prop_unique_ctx;
-- determine the obsolete entries from alf_prop_value
create temporary table temp2 (id bigint(20), index (id));
insert into temp2 select alf_prop_value.id as id from alf_prop_value left join temp1 on alf_prop_value.id = temp1.id where temp1.id is null;
-- clear the obsolete entries
delete from alf_prop_value where id in (select id from temp2);
-- cleanup temporary structures
drop table temp1;
drop table temp2;
-- find and clear obsoleted serialized values
create temporary table temp1 (id bigint(20), index (id));
insert into temp1 select alf_prop_serializable_value.id as id from alf_prop_serializable_value left join alf_prop_value on alf_prop_value.long_value = alf_prop_serializable_value.id and alf_prop_value.persisted_type = 4 where alf_prop_value.id is null;
delete from alf_prop_serializable_value where id IN (select id from temp1);
-- find and clear obsoleted string values
create temporary table temp2 (id bigint(20), index (id));
insert into temp2 select alf_prop_string_value.id as id from alf_prop_string_value left join alf_prop_value on alf_prop_value.long_value = alf_prop_string_value.id and alf_prop_value.persisted_type in (3,5,6) where alf_prop_value.id is null;
delete from alf_prop_string_value where id in (select id from temp2);
-- find and clear obsoleted double values
create temporary table temp3 (id bigint(20), index (id));
insert into temp3 select alf_prop_double_value.id as id from alf_prop_double_value left join alf_prop_value on alf_prop_value.long_value = alf_prop_double_value.id and alf_prop_value.persisted_type = 2 where alf_prop_value.id is null;
delete from alf_prop_double_value where id in (select id from temp3);
-- cleanup temporary structures
drop table temp1;
drop table temp2;
drop table temp3;
commit;
begin;
-- get all active references to alf_prop_root
create temp table temp1 as select disabled_paths_id as id from alf_audit_app;
create index idx_tmp1_id on temp1(id);
insert into temp1 select audit_values_id from alf_audit_entry;
insert into temp1 select prop1_id from alf_prop_unique_ctx;
-- determine the obsolete entries from alf_prop_root
create temp table temp2 as select alf_prop_root.id from alf_prop_root left join temp1 on temp1.id = alf_prop_root.id where temp1.id is null;
create index idx_tmp2_id on temp2(id);
-- clear alf_prop_root which cascades DELETE to alf_prop_link
delete from alf_prop_root where id in (select id from temp2);
-- cleanup temporary structures
drop index idx_tmp1_id;
drop index idx_tmp2_id;
drop table temp1;
drop table temp2;
-- get all active references to alf_prop_value
create temp table temp1 as select id from alf_prop_value where id in (select app_name_id from alf_audit_app);
create index idx_tmp1_id on temp1(id);
insert into temp1 select audit_user_id from alf_audit_entry;
insert into temp1 select key_prop_id from alf_prop_link;
insert into temp1 select value_prop_id from alf_prop_link;
insert into temp1 select value1_prop_id from alf_prop_unique_ctx;
insert into temp1 select value2_prop_id from alf_prop_unique_ctx;
insert into temp1 select value3_prop_id from alf_prop_unique_ctx;
-- determine the obsolete entries from alf_prop_value
create temp table temp2 as select alf_prop_value.id from alf_prop_value left join temp1 on alf_prop_value.id = temp1.id where temp1.id is null;
create index idx_tmp2_id on temp2(id);
-- clear the obsolete entries
delete from alf_prop_value where id in (select id from temp2);
-- cleanup temporary structures
drop index idx_tmp1_id;
drop index idx_tmp2_id;
drop table temp1;
drop table temp2;
-- find and clear obsoleted serialized values
create temp table temp1 as select alf_prop_serializable_value.id from alf_prop_serializable_value left join alf_prop_value on alf_prop_value.long_value = alf_prop_serializable_value.id and alf_prop_value.persisted_type = 4 where alf_prop_value.id is null;
create index idx_tmp1_id on temp1(id);
delete from alf_prop_serializable_value where id IN (select id from temp1);
-- find and clear obsoleted string values
create temp table temp2 as select alf_prop_string_value.id from alf_prop_string_value left join alf_prop_value on alf_prop_value.long_value = alf_prop_string_value.id and alf_prop_value.persisted_type in (3,5,6) where alf_prop_value.id is null;
create index idx_tmp2_id on temp2(id);
delete from alf_prop_string_value where id in (select id from temp2);
-- find and clear obsoleted double values
create temp table temp3 as select alf_prop_double_value.id from alf_prop_double_value left join alf_prop_value on alf_prop_value.long_value = alf_prop_double_value.id and alf_prop_value.persisted_type = 2 where alf_prop_value.id is null;
create index idx_tmp3_id on temp3(id);
delete from alf_prop_double_value where id in (select id from temp3);
-- cleanup temporary structures
drop index idx_tmp1_id;
drop index idx_tmp2_id;
drop index idx_tmp3_id;
drop table temp1;
drop table temp2;
drop table temp3;
commit;
-- trigger maintenance operations to optimize database
vacuum full;
analyze;
@mkpelletier

This comment has been minimized.

Show comment
Hide comment
@mkpelletier

mkpelletier Dec 3, 2015

Thanks for the script. This is a real life saver. Do you suppose there is any harm in putting a LIMIT on the SELECT/INSERT queries. I have an alf_prop_link table that is enormous, and the outer joins on this table takes an eternity.

mkpelletier commented Dec 3, 2015

Thanks for the script. This is a real life saver. Do you suppose there is any harm in putting a LIMIT on the SELECT/INSERT queries. I have an alf_prop_link table that is enormous, and the outer joins on this table takes an eternity.

@nettree

This comment has been minimized.

Show comment
Hide comment
@nettree

nettree May 5, 2017

Hi, can this SQL script be used on Alfresco 5.2? I am not sure whether it's equivalent to audit clear webscript? I can see from log when propTablesCleanupJobDetail scheduled job is running, there are two sql got executed.

alfresco/dbscripts/utility/org.hibernate.dialect.PostgreSQLDialect/CleanAlfPropTablesPostExec.sql and
alfresco/dbscripts/utility/org.hibernate.dialect.PostgreSQLDialect/CleanAlfPropTables.sql,
is the second one the same with what we have here?

nettree commented May 5, 2017

Hi, can this SQL script be used on Alfresco 5.2? I am not sure whether it's equivalent to audit clear webscript? I can see from log when propTablesCleanupJobDetail scheduled job is running, there are two sql got executed.

alfresco/dbscripts/utility/org.hibernate.dialect.PostgreSQLDialect/CleanAlfPropTablesPostExec.sql and
alfresco/dbscripts/utility/org.hibernate.dialect.PostgreSQLDialect/CleanAlfPropTables.sql,
is the second one the same with what we have here?

@AFaust

This comment has been minimized.

Show comment
Hide comment
@AFaust

AFaust Oct 18, 2017

@nettree Somehow I did not get notified about your message. The scripts now included in Alfresco by default are based on the scripts in this Gist, but have been significantly modified to work e.g. with batches. They are thus not comparable anymore.
Since the scripts included in Alfresco 5.2 (and the original scripts) have the drawback of trying to process everything in one large transaction, I have since developed (for a customer) an open source module that provides incremental cleanup capabilities.

Owner

AFaust commented Oct 18, 2017

@nettree Somehow I did not get notified about your message. The scripts now included in Alfresco by default are based on the scripts in this Gist, but have been significantly modified to work e.g. with batches. They are thus not comparable anymore.
Since the scripts included in Alfresco 5.2 (and the original scripts) have the drawback of trying to process everything in one large transaction, I have since developed (for a customer) an open source module that provides incremental cleanup capabilities.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment